xref: /XiangShan/src/main/scala/xiangshan/cache/mmu/TLB.scala (revision d7dd2491d434d4cb5125f72a4c58b3ce848a7178)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15*
16*
17* Acknowledgement
18*
19* This implementation is inspired by several key papers:
20* [1] Binh Pham, Viswanathan Vaidyanathan, Aamer Jaleel, and Abhishek Bhattacharjee. "[Colt: Coalesced large-reach
21* tlbs.](https://doi.org/10.1109/MICRO.2012.32)" 45th Annual IEEE/ACM International Symposium on Microarchitecture
22* (MICRO). 2012.
23***************************************************************************************/
24
25package xiangshan.cache.mmu
26
27import org.chipsalliance.cde.config.Parameters
28import chisel3._
29import chisel3.util._
30import difftest._
31import freechips.rocketchip.util.SRAMAnnotation
32import xiangshan._
33import utils._
34import utility._
35import xiangshan.backend.fu.{PMPChecker, PMPReqBundle, PMPConfig => XSPMPConfig}
36import xiangshan.backend.rob.RobPtr
37import xiangshan.backend.fu.util.HasCSRConst
38import freechips.rocketchip.rocket.PMPConfig
39
40/** TLB module
41  * support block request and non-block request io at the same time
42  * return paddr at next cycle, then go for pmp/pma check
43  * @param Width: The number of requestors
44  * @param Block: Blocked or not for each requestor ports
45  * @param q: TLB Parameters, like entry number, each TLB has its own parameters
46  * @param p: XiangShan Paramemters, like XLEN
47  */
48
49class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters)(implicit p: Parameters) extends TlbModule
50  with HasCSRConst
51  with HasPerfEvents
52{
53  val io = IO(new TlbIO(Width, nRespDups, q))
54
55  val req = io.requestor.map(_.req)
56  val resp = io.requestor.map(_.resp)
57  val ptw = io.ptw
58  val pmp = io.pmp
59  val refill_to_mem = io.refill_to_mem
60
61  /** Sfence.vma & Svinval
62    * Sfence.vma will 1. flush old entries 2. flush inflight 3. flush pipe
63    * Svinval will 1. flush old entries 2. flush inflight
64    * So, Svinval will not flush pipe, which means
65    * it should not drop reqs from pipe and should return right resp
66    */
67  val sfence = DelayN(io.sfence, q.fenceDelay)
68  val csr = DelayN(io.csr, q.fenceDelay)
69
70  val flush_mmu = sfence.valid || csr.satp.changed || csr.vsatp.changed || csr.hgatp.changed
71  val mmu_flush_pipe = sfence.valid && sfence.bits.flushPipe // for svinval, won't flush pipe
72  val flush_pipe = io.flushPipe
73  val redirect = io.redirect
74  val EffectiveVa = Wire(Vec(Width, UInt(XLEN.W)))
75  val req_in = req
76  val req_out = Reg(Vec(Width, new TlbReq))
77  for (i <- 0 until Width) {
78    when (req(i).fire) {
79      req_out(i) := req(i).bits
80      req_out(i).fullva := EffectiveVa(i)
81    }
82  }
83  val req_out_v = (0 until Width).map(i => ValidHold(req_in(i).fire && !req_in(i).bits.kill, resp(i).fire, flush_pipe(i)))
84
85  val isHyperInst = (0 until Width).map(i => req_out_v(i) && req_out(i).hyperinst)
86
87  // ATTENTION: csr and flush from backend are delayed. csr should not be later than flush.
88  // because, csr will influence tlb behavior.
89  val ifetch = if (q.fetchi) true.B else false.B
90  val mode_tmp = if (q.useDmode) csr.priv.dmode else csr.priv.imode
91  val mode = (0 until Width).map(i => Mux(isHyperInst(i), csr.priv.spvp, mode_tmp))
92  val virt_in = csr.priv.virt
93  val virt_out = req.map(a => RegEnable(csr.priv.virt, a.fire))
94  val sum = (0 until Width).map(i => Mux(virt_out(i) || isHyperInst(i), csr.priv.vsum, csr.priv.sum))
95  val mxr = (0 until Width).map(i => Mux(virt_out(i) || isHyperInst(i), csr.priv.vmxr || csr.priv.mxr, csr.priv.mxr))
96  val req_in_s2xlate = (0 until Width).map(i => MuxCase(noS2xlate, Seq(
97      (!(virt_in || req_in(i).bits.hyperinst)) -> noS2xlate,
98      (csr.vsatp.mode =/= 0.U && csr.hgatp.mode =/= 0.U) -> allStage,
99      (csr.vsatp.mode === 0.U) -> onlyStage2,
100      (csr.hgatp.mode === 0.U) -> onlyStage1
101    )))
102  val req_out_s2xlate = (0 until Width).map(i => MuxCase(noS2xlate, Seq(
103    (!(virt_out(i) || isHyperInst(i))) -> noS2xlate,
104    (csr.vsatp.mode =/= 0.U && csr.hgatp.mode =/= 0.U) -> allStage,
105    (csr.vsatp.mode === 0.U) -> onlyStage2,
106    (csr.hgatp.mode === 0.U) -> onlyStage1
107  )))
108  val need_gpa = RegInit(false.B)
109  val need_gpa_wire = WireInit(false.B)
110  val need_gpa_robidx = Reg(new RobPtr)
111  val need_gpa_vpn = Reg(UInt(vpnLen.W))
112  val resp_gpa_gvpn = Reg(UInt(ptePPNLen.W))
113  val resp_gpa_refill = RegInit(false.B)
114  val resp_s1_level = RegInit(0.U(log2Up(Level + 1).W))
115  val resp_s1_isLeaf = RegInit(false.B)
116  val resp_s1_isFakePte = RegInit(false.B)
117  val hasGpf = Wire(Vec(Width, Bool()))
118
119  val Sv39Enable = csr.satp.mode === 8.U
120  val Sv48Enable = csr.satp.mode === 9.U
121  val Sv39vsEnable = csr.vsatp.mode === 8.U
122  val Sv48vsEnable = csr.vsatp.mode === 9.U
123  val Sv39x4Enable = csr.hgatp.mode === 8.U
124  val Sv48x4Enable = csr.hgatp.mode === 9.U
125
126  val vmEnable = (0 until Width).map(i => !(isHyperInst(i) || virt_out(i)) && (
127    if (EnbaleTlbDebug) (Sv39Enable || Sv48Enable)
128    else (Sv39Enable || Sv48Enable) && (mode(i) < ModeM))
129  )
130  val s2xlateEnable = (0 until Width).map(i =>
131    (isHyperInst(i) || virt_out(i)) &&
132    (Sv39vsEnable || Sv48vsEnable || Sv39x4Enable || Sv48x4Enable) &&
133    (mode(i) < ModeM)
134  )
135  val portTranslateEnable = (0 until Width).map(i => (vmEnable(i) || s2xlateEnable(i)) && RegEnable(!req(i).bits.no_translate, req(i).valid))
136
137  // pre fault: check fault before real do translate
138  val prepf = WireInit(VecInit(Seq.fill(Width)(false.B)))
139  val pregpf = WireInit(VecInit(Seq.fill(Width)(false.B)))
140  val preaf = WireInit(VecInit(Seq.fill(Width)(false.B)))
141  val premode = (0 until Width).map(i => Mux(req_in(i).bits.hyperinst, csr.priv.spvp, mode_tmp))
142  for (i <- 0 until Width) {
143    resp(i).bits.fullva := RegEnable(EffectiveVa(i), req(i).valid)
144  }
145  val prevmEnable = (0 until Width).map(i => !(virt_in || req_in(i).bits.hyperinst) && (
146    if (EnbaleTlbDebug) (Sv39Enable || Sv48Enable)
147    else (Sv39Enable || Sv48Enable) && (premode(i) < ModeM))
148  )
149  val pres2xlateEnable = (0 until Width).map(i =>
150    (virt_in || req_in(i).bits.hyperinst) &&
151    (Sv39vsEnable || Sv48vsEnable || Sv39x4Enable || Sv48x4Enable) &&
152    (premode(i) < ModeM)
153  )
154
155  (0 until Width).foreach{i =>
156
157    val pmm = WireInit(0.U(2.W))
158
159    when (ifetch || req(i).bits.hlvx) {
160      pmm := 0.U
161    } .elsewhen (premode(i) === ModeM) {
162      pmm := csr.pmm.mseccfg
163    } .elsewhen (!(virt_in || req_in(i).bits.hyperinst) && premode(i) === ModeS) {
164      pmm := csr.pmm.menvcfg
165    } .elsewhen ((virt_in || req_in(i).bits.hyperinst) && premode(i) === ModeS) {
166      pmm := csr.pmm.henvcfg
167    } .elsewhen (req_in(i).bits.hyperinst && csr.priv.imode === ModeU) {
168      pmm := csr.pmm.hstatus
169    } .elsewhen (premode(i) === ModeU) {
170      pmm := csr.pmm.senvcfg
171    }
172
173    when (prevmEnable(i) || (pres2xlateEnable(i) && csr.vsatp.mode =/= 0.U)) {
174      when (pmm === PMLEN7) {
175        EffectiveVa(i) := SignExt(req_in(i).bits.fullva(56, 0), XLEN)
176      } .elsewhen (pmm === PMLEN16) {
177        EffectiveVa(i) := SignExt(req_in(i).bits.fullva(47, 0), XLEN)
178      } .otherwise {
179        EffectiveVa(i) := req_in(i).bits.fullva
180      }
181    } .otherwise {
182      when (pmm === PMLEN7) {
183        EffectiveVa(i) := ZeroExt(req_in(i).bits.fullva(56, 0), XLEN)
184      } .elsewhen (pmm === PMLEN16) {
185        EffectiveVa(i) := ZeroExt(req_in(i).bits.fullva(47, 0), XLEN)
186      } .otherwise {
187        EffectiveVa(i) := req_in(i).bits.fullva
188      }
189    }
190
191    val pf48 = SignExt(EffectiveVa(i)(47, 0), XLEN) =/= EffectiveVa(i)
192    val pf39 = SignExt(EffectiveVa(i)(38, 0), XLEN) =/= EffectiveVa(i)
193    val gpf48 = EffectiveVa(i)(XLEN - 1, 48 + 2) =/= 0.U
194    val gpf39 = EffectiveVa(i)(XLEN - 1, 39 + 2) =/= 0.U
195    val af = EffectiveVa(i)(XLEN - 1, PAddrBits) =/= 0.U
196    when (req(i).valid && req(i).bits.checkfullva) {
197      when (prevmEnable(i) || pres2xlateEnable(i)) {
198        when (req_in_s2xlate(i) === onlyStage2) {
199          when (Sv48x4Enable) {
200            pregpf(i) := gpf48
201          } .elsewhen (Sv39x4Enable) {
202            pregpf(i) := gpf39
203          }
204        } .elsewhen (req_in_s2xlate(i) === onlyStage1 || req_in_s2xlate(i) === allStage) {
205          when (Sv48vsEnable) {
206            prepf(i) := pf48
207          } .elsewhen (Sv39vsEnable) {
208            prepf(i) := pf39
209          }
210        } .otherwise { // noS2xlate
211          when (Sv48Enable) {
212            prepf(i) := pf48
213          } .elsewhen (Sv39Enable) {
214            prepf(i) := pf39
215          }
216        }
217      } .otherwise {
218        preaf(i) := af
219      }
220    }
221  }
222
223  val refill = ptw.resp.fire && !(ptw.resp.bits.getGpa) && !need_gpa && !need_gpa_wire && !flush_mmu
224  // prevent ptw refill when: 1) it's a getGpa request; 2) l1tlb is in need_gpa state; 3) mmu is being flushed.
225
226  refill_to_mem := DontCare
227  val entries = Module(new TlbStorageWrapper(Width, q, nRespDups))
228  entries.io.base_connect(sfence, csr, csr.satp)
229  if (q.outReplace) { io.replace <> entries.io.replace }
230  for (i <- 0 until Width) {
231    entries.io.r_req_apply(io.requestor(i).req.valid, get_pn(req_in(i).bits.vaddr), i, req_in_s2xlate(i))
232    entries.io.w_apply(refill, ptw.resp.bits)
233    // TODO: RegNext enable:req.valid
234    resp(i).bits.debug.isFirstIssue := RegEnable(req(i).bits.debug.isFirstIssue, req(i).valid)
235    resp(i).bits.debug.robIdx := RegEnable(req(i).bits.debug.robIdx, req(i).valid)
236  }
237
238  // read TLB, get hit/miss, paddr, perm bits
239  val readResult = (0 until Width).map(TLBRead(_))
240  val hitVec = readResult.map(_._1)
241  val missVec = readResult.map(_._2)
242  val pmp_addr = readResult.map(_._3)
243  val perm = readResult.map(_._4)
244  val g_perm = readResult.map(_._5)
245  val pbmt = readResult.map(_._6)
246  val g_pbmt = readResult.map(_._7)
247  // check pmp use paddr (for timing optization, use pmp_addr here)
248  // check permisson
249  (0 until Width).foreach{i =>
250    val noTranslateReg = RegNext(req(i).bits.no_translate)
251    val addr = Mux(noTranslateReg, req(i).bits.pmp_addr, pmp_addr(i))
252    pmp_check(addr, req_out(i).size, req_out(i).cmd, noTranslateReg, i)
253    for (d <- 0 until nRespDups) {
254      pbmt_check(i, d, pbmt(i)(d), g_pbmt(i)(d), req_out_s2xlate(i))
255      perm_check(perm(i)(d), req_out(i).cmd, i, d, g_perm(i)(d), req_out(i).hlvx, req_out_s2xlate(i), prepf(i), pregpf(i), preaf(i))
256    }
257    hasGpf(i) := hitVec(i) && (resp(i).bits.excp(0).gpf.ld || resp(i).bits.excp(0).gpf.st || resp(i).bits.excp(0).gpf.instr)
258  }
259
260  // handle block or non-block io
261  // for non-block io, just return the above result, send miss to ptw
262  // for block io, hold the request, send miss to ptw,
263  //   when ptw back, return the result
264  (0 until Width) foreach {i =>
265    if (Block(i)) handle_block(i)
266    else handle_nonblock(i)
267  }
268  io.ptw.resp.ready := true.B
269
270  /************************  main body above | method/log/perf below ****************************/
271  def TLBRead(i: Int) = {
272    val (e_hit, e_ppn, e_perm, e_g_perm, e_s2xlate, e_pbmt, e_g_pbmt) = entries.io.r_resp_apply(i)
273    val (p_hit, p_ppn, p_pbmt, p_perm, p_gvpn, p_g_pbmt, p_g_perm, p_s2xlate, p_s1_level, p_s1_isLeaf, p_s1_isFakePte) = ptw_resp_bypass(get_pn(req_in(i).bits.vaddr), req_in_s2xlate(i))
274    val enable = portTranslateEnable(i)
275    val isOnlys2xlate = req_out_s2xlate(i) === onlyStage2
276    val need_gpa_vpn_hit = need_gpa_vpn === get_pn(req_out(i).vaddr)
277    val isitlb = TlbCmd.isExec(req_out(i).cmd)
278    val isPrefetch = req_out(i).isPrefetch
279    val currentRedirect = req_out(i).debug.robIdx.needFlush(redirect)
280    val lastCycleRedirect = req_out(i).debug.robIdx.needFlush(RegNext(redirect))
281
282    when (!isitlb && need_gpa_robidx.needFlush(redirect) || isitlb && flush_pipe(i)){
283      need_gpa := false.B
284      resp_gpa_refill := false.B
285      need_gpa_vpn := 0.U
286    }.elsewhen (req_out_v(i) && !p_hit && !(resp_gpa_refill && need_gpa_vpn_hit) && !isOnlys2xlate && hasGpf(i) && need_gpa === false.B && !io.requestor(i).req_kill && !isPrefetch && !currentRedirect && !lastCycleRedirect) {
287      need_gpa_wire := true.B
288      need_gpa := true.B
289      need_gpa_vpn := get_pn(req_out(i).vaddr)
290      resp_gpa_refill := false.B
291      need_gpa_robidx := req_out(i).debug.robIdx
292    }.elsewhen (ptw.resp.fire && need_gpa && need_gpa_vpn === ptw.resp.bits.getVpn(need_gpa_vpn)) {
293      resp_gpa_gvpn := Mux(ptw.resp.bits.s2xlate === onlyStage2, ptw.resp.bits.s2.entry.tag, ptw.resp.bits.s1.genGVPN(need_gpa_vpn))
294      resp_s1_level := ptw.resp.bits.s1.entry.level.get
295      resp_s1_isLeaf := ptw.resp.bits.s1.isLeaf()
296      resp_s1_isFakePte := ptw.resp.bits.s1.isFakePte()
297      resp_gpa_refill := true.B
298    }
299
300    when (req_out_v(i) && hasGpf(i) && resp_gpa_refill && need_gpa_vpn_hit){
301      need_gpa := false.B
302    }
303
304    val hit = e_hit || p_hit
305    val miss = (!hit && enable) || hasGpf(i) && !p_hit && !(resp_gpa_refill && need_gpa_vpn_hit) && !isOnlys2xlate && !isPrefetch && !lastCycleRedirect
306    hit.suggestName(s"hit_read_${i}")
307    miss.suggestName(s"miss_read_${i}")
308
309    val vaddr = SignExt(req_out(i).vaddr, PAddrBits)
310    resp(i).bits.miss := miss
311    resp(i).bits.ptwBack := ptw.resp.fire
312    resp(i).bits.memidx := RegEnable(req_in(i).bits.memidx, req_in(i).valid)
313    resp(i).bits.fastMiss := !hit && enable
314
315    val ppn = WireInit(VecInit(Seq.fill(nRespDups)(0.U(ppnLen.W))))
316    val pbmt = WireInit(VecInit(Seq.fill(nRespDups)(0.U(ptePbmtLen.W))))
317    val perm = WireInit(VecInit(Seq.fill(nRespDups)(0.U.asTypeOf(new TlbPermBundle))))
318    val gvpn = WireInit(VecInit(Seq.fill(nRespDups)(0.U(ptePPNLen.W))))
319    val level = WireInit(VecInit(Seq.fill(nRespDups)(0.U(log2Up(Level + 1).W))))
320    val isLeaf = WireInit(VecInit(Seq.fill(nRespDups)(false.B)))
321    val isFakePte = WireInit(VecInit(Seq.fill(nRespDups)(false.B)))
322    val g_pbmt = WireInit(VecInit(Seq.fill(nRespDups)(0.U(ptePbmtLen.W))))
323    val g_perm = WireInit(VecInit(Seq.fill(nRespDups)(0.U.asTypeOf(new TlbPermBundle))))
324    val r_s2xlate = WireInit(VecInit(Seq.fill(nRespDups)(0.U(2.W))))
325    for (d <- 0 until nRespDups) {
326      ppn(d) := Mux(p_hit, p_ppn, e_ppn(d))
327      pbmt(d) := Mux(p_hit, p_pbmt, e_pbmt(d))
328      perm(d) := Mux(p_hit, p_perm, e_perm(d))
329      gvpn(d) :=  Mux(p_hit, p_gvpn, resp_gpa_gvpn)
330      level(d) := Mux(p_hit, p_s1_level, resp_s1_level)
331      isLeaf(d) := Mux(p_hit, p_s1_isLeaf, resp_s1_isLeaf)
332      isFakePte(d) := Mux(p_hit, p_s1_isFakePte, resp_s1_isFakePte)
333      g_pbmt(d) := Mux(p_hit, p_g_pbmt, e_g_pbmt(d))
334      g_perm(d) := Mux(p_hit, p_g_perm, e_g_perm(d))
335      r_s2xlate(d) := Mux(p_hit, p_s2xlate, e_s2xlate(d))
336      val paddr = Cat(ppn(d), get_off(req_out(i).vaddr))
337      val vpn_idx = Mux1H(Seq(
338        (isFakePte(d) && csr.vsatp.mode === Sv39) -> 2.U,
339        (isFakePte(d) && csr.vsatp.mode === Sv48) -> 3.U,
340        (!isFakePte(d)) -> (level(d) - 1.U),
341      ))
342      // We use `fullva` here when `isLeaf`, in order to cope with the situation of an unaligned load/store cross page
343      // for example, a `ld` instruction on address 0x81000ffb will be splited into two loads
344      // 1. ld 0x81000ff8. vaddr = 0x81000ff8, fullva = 0x80000ffb
345      // 2. ld 0x81001000. vaddr = 0x81001000, fullva = 0x80000ffb
346      // When load 1 trigger a guest page fault, we should use offset of fullva when generate gpaddr
347      // and when load 2 trigger a guest page fault, we should just use offset of vaddr(all zero).
348      // Also, when onlyS2, if crosspage, gpaddr = vaddr(start address of a new page), else gpaddr = fullva(original vaddr)
349      // By the way, frontend handles the cross page instruction fetch by itself, so TLB doesn't need to do anything extra.
350      // Also, the fullva of iTLB is not used and always zero. crossPageVaddr should never use fullva in iTLB.
351      val crossPageVaddr = Mux(isitlb || req_out(i).fullva(12) =/= vaddr(12), vaddr, req_out(i).fullva)
352      val gpaddr_offset = Mux(isLeaf(d), get_off(crossPageVaddr), Cat(getVpnn(get_pn(crossPageVaddr), vpn_idx), 0.U(log2Up(XLEN/8).W)))
353      val gpaddr = Cat(gvpn(d), gpaddr_offset)
354      resp(i).bits.paddr(d) := Mux(enable, paddr, vaddr)
355      resp(i).bits.gpaddr(d) := Mux(r_s2xlate(d) === onlyStage2, crossPageVaddr, gpaddr)
356    }
357
358    XSDebug(req_out_v(i), p"(${i.U}) hit:${hit} miss:${miss} ppn:${Hexadecimal(ppn(0))} perm:${perm(0)}\n")
359
360    val pmp_paddr = resp(i).bits.paddr(0)
361
362    (hit, miss, pmp_paddr, perm, g_perm, pbmt, g_pbmt)
363  }
364
365  def getVpnn(vpn: UInt, idx: UInt): UInt = {
366    MuxLookup(idx, 0.U)(Seq(
367      0.U -> vpn(vpnnLen - 1, 0),
368      1.U -> vpn(vpnnLen * 2 - 1, vpnnLen),
369      2.U -> vpn(vpnnLen * 3 - 1, vpnnLen * 2),
370      3.U -> vpn(vpnnLen * 4 - 1, vpnnLen * 3))
371    )
372  }
373
374  def pmp_check(addr: UInt, size: UInt, cmd: UInt, noTranslate: Bool, idx: Int): Unit = {
375    pmp(idx).valid := resp(idx).valid || noTranslate
376    pmp(idx).bits.addr := addr
377    pmp(idx).bits.size := size
378    pmp(idx).bits.cmd := cmd
379  }
380
381  def pbmt_check(idx: Int, d: Int, pbmt: UInt, g_pbmt: UInt, s2xlate: UInt):Unit = {
382    val onlyS1 = s2xlate === onlyStage1 || s2xlate === noS2xlate
383    val pbmtRes = pbmt
384    val gpbmtRes = g_pbmt
385    val res = MuxLookup(s2xlate, 0.U)(Seq(
386      onlyStage1 -> pbmtRes,
387      onlyStage2 -> gpbmtRes,
388      allStage -> Mux(pbmtRes =/= 0.U, pbmtRes, gpbmtRes),
389      noS2xlate -> pbmtRes
390    ))
391    resp(idx).bits.pbmt(d) := Mux(portTranslateEnable(idx), res, 0.U)
392  }
393
394  // for timing optimization, pmp check is divided into dynamic and static
395  def perm_check(perm: TlbPermBundle, cmd: UInt, idx: Int, nDups: Int, g_perm: TlbPermBundle, hlvx: Bool, s2xlate: UInt, prepf: Bool = false.B, pregpf: Bool = false.B, preaf: Bool = false.B) = {
396    // dynamic: superpage (or full-connected reg entries) -> check pmp when translation done
397    // static: 4K pages (or sram entries) -> check pmp with pre-checked results
398    val hasS2xlate = s2xlate =/= noS2xlate
399    val onlyS1 = s2xlate === onlyStage1
400    val onlyS2 = s2xlate === onlyStage2
401    val allS2xlate = s2xlate === allStage
402    // noS2xlate || onlyS1 -> perm.af
403    // onlyS2 -> g_perm.af
404    // allS2xlate -> perm.af || g_perm.af
405    val af = (!onlyS2 && perm.af) || ((onlyS2 || allS2xlate) && g_perm.af)
406
407    // Stage 1 perm check
408    val pf = perm.pf
409    val isLd = TlbCmd.isRead(cmd) && !TlbCmd.isAmo(cmd)
410    val isSt = TlbCmd.isWrite(cmd) || TlbCmd.isAmo(cmd)
411    val isInst = TlbCmd.isExec(cmd)
412    val ldUpdate = !perm.a && isLd // update A/D through exception
413    val stUpdate = (!perm.a || !perm.d) && isSt // update A/D through exception
414    val instrUpdate = !perm.a && isInst // update A/D through exception
415    val modeCheck = !(mode(idx) === ModeU && !perm.u || mode(idx) === ModeS && perm.u && (!sum(idx) || ifetch))
416    val ldPermFail = !(modeCheck && Mux(hlvx, perm.x, perm.r || mxr(idx) && perm.x))
417    val stPermFail = !(modeCheck && perm.w)
418    val instrPermFail = !(modeCheck && perm.x)
419    val ldPf = (ldPermFail || pf) && isLd
420    val stPf = (stPermFail || pf) && isSt
421    val instrPf = (instrPermFail || pf) && isInst
422    val isFakePte = !perm.v && !perm.pf && !perm.af && !onlyS2
423    val isNonLeaf = !(perm.r || perm.w || perm.x) && perm.v && !perm.pf && !perm.af
424    val s1_valid = portTranslateEnable(idx) && !onlyS2
425
426    // Stage 2 perm check
427    val gpf = g_perm.pf
428    val g_ldUpdate = !g_perm.a && isLd
429    val g_stUpdate = (!g_perm.a || !g_perm.d) && isSt
430    val g_instrUpdate = !g_perm.a && isInst
431    val g_ldPermFail = !Mux(hlvx, g_perm.x, (g_perm.r || csr.priv.mxr && g_perm.x))
432    val g_stPermFail = !g_perm.w
433    val g_instrPermFail = !g_perm.x
434    val ldGpf = (g_ldPermFail || gpf) && isLd
435    val stGpf = (g_stPermFail || gpf) && isSt
436    val instrGpf = (g_instrPermFail || gpf) && isInst
437    val s2_valid = portTranslateEnable(idx) && (onlyS2 || allS2xlate)
438
439    val fault_valid = s1_valid || s2_valid
440
441    // when pf and gpf can't happens simultaneously
442    val hasPf = (ldPf || ldUpdate || stPf || stUpdate || instrPf || instrUpdate) && s1_valid && !af && !isFakePte && !isNonLeaf
443    // Only lsu need check related to high address truncation
444    when (RegNext(prepf || pregpf || preaf)) {
445      resp(idx).bits.isForVSnonLeafPTE := false.B
446      resp(idx).bits.excp(nDups).pf.ld := RegNext(prepf) && isLd
447      resp(idx).bits.excp(nDups).pf.st := RegNext(prepf) && isSt
448      resp(idx).bits.excp(nDups).pf.instr := false.B
449
450      resp(idx).bits.excp(nDups).gpf.ld := RegNext(pregpf) && isLd
451      resp(idx).bits.excp(nDups).gpf.st := RegNext(pregpf) && isSt
452      resp(idx).bits.excp(nDups).gpf.instr := false.B
453
454      resp(idx).bits.excp(nDups).af.ld := RegNext(preaf) && TlbCmd.isRead(cmd)
455      resp(idx).bits.excp(nDups).af.st := RegNext(preaf) && TlbCmd.isWrite(cmd)
456      resp(idx).bits.excp(nDups).af.instr := false.B
457
458      resp(idx).bits.excp(nDups).vaNeedExt := false.B
459      // overwrite miss & gpaddr when exception related to high address truncation happens
460      resp(idx).bits.miss := false.B
461      resp(idx).bits.gpaddr(nDups) := req_out(idx).fullva
462    } .otherwise {
463      // isForVSnonLeafPTE is used only when gpf happens and it caused by a G-stage translation which supports VS-stage translation
464      // it will be sent to CSR in order to modify the m/htinst.
465      // Ref: The RISC-V Instruction Set Manual: Volume II: Privileged Architecture - 19.6.3. Transformed Instruction or Pseudoinstruction for mtinst or htinst
466      val isForVSnonLeafPTE = isNonLeaf || isFakePte
467      resp(idx).bits.isForVSnonLeafPTE := isForVSnonLeafPTE
468      resp(idx).bits.excp(nDups).pf.ld := (ldPf || ldUpdate) && s1_valid && !af && !isFakePte && !isNonLeaf
469      resp(idx).bits.excp(nDups).pf.st := (stPf || stUpdate) && s1_valid && !af && !isFakePte && !isNonLeaf
470      resp(idx).bits.excp(nDups).pf.instr := (instrPf || instrUpdate) && s1_valid && !af && !isFakePte && !isNonLeaf
471      // NOTE: pf need && with !af, page fault has higher priority than access fault
472      // but ptw may also have access fault, then af happens, the translation is wrong.
473      // In this case, pf has lower priority than af
474
475      resp(idx).bits.excp(nDups).gpf.ld := (ldGpf || g_ldUpdate) && s2_valid && !af && !hasPf
476      resp(idx).bits.excp(nDups).gpf.st := (stGpf || g_stUpdate) && s2_valid && !af && !hasPf
477      resp(idx).bits.excp(nDups).gpf.instr := (instrGpf || g_instrUpdate) && s2_valid && !af && !hasPf
478
479      resp(idx).bits.excp(nDups).af.ld    := af && TlbCmd.isRead(cmd) && fault_valid
480      resp(idx).bits.excp(nDups).af.st    := af && TlbCmd.isWrite(cmd) && fault_valid
481      resp(idx).bits.excp(nDups).af.instr := af && TlbCmd.isExec(cmd) && fault_valid
482
483      resp(idx).bits.excp(nDups).vaNeedExt := true.B
484    }
485
486    resp(idx).bits.excp(nDups).isHyper := isHyperInst(idx)
487  }
488
489  def handle_nonblock(idx: Int): Unit = {
490    io.requestor(idx).resp.valid := req_out_v(idx)
491    io.requestor(idx).req.ready := io.requestor(idx).resp.ready // should always be true
492    XSError(!io.requestor(idx).resp.ready, s"${q.name} port ${idx} is non-block, resp.ready must be true.B")
493
494    val req_need_gpa = hasGpf(idx)
495    val req_s2xlate = Wire(UInt(2.W))
496    req_s2xlate := MuxCase(noS2xlate, Seq(
497      (!(virt_out(idx) || req_out(idx).hyperinst)) -> noS2xlate,
498      (csr.vsatp.mode =/= 0.U && csr.hgatp.mode =/= 0.U) -> allStage,
499      (csr.vsatp.mode === 0.U) -> onlyStage2,
500      (csr.hgatp.mode === 0.U) -> onlyStage1
501    ))
502
503    val ptw_just_back = ptw.resp.fire && req_s2xlate === ptw.resp.bits.s2xlate && ptw.resp.bits.hit(get_pn(req_out(idx).vaddr), csr.satp.asid, csr.vsatp.asid, csr.hgatp.vmid, true, false)
504    // TODO: RegNext enable: ptw.resp.valid ? req.valid
505    val ptw_resp_bits_reg = RegEnable(ptw.resp.bits, ptw.resp.valid)
506    val ptw_already_back = GatedValidRegNext(ptw.resp.fire) && req_s2xlate === ptw_resp_bits_reg.s2xlate && ptw_resp_bits_reg.hit(get_pn(req_out(idx).vaddr), csr.satp.asid, csr.vsatp.asid, csr.hgatp.vmid, allType = true)
507    val ptw_getGpa = req_need_gpa && hitVec(idx)
508    val need_gpa_vpn_hit = need_gpa_vpn === get_pn(req_out(idx).vaddr)
509
510    io.ptw.req(idx).valid := false.B;
511    io.tlbreplay(idx) := false.B;
512
513    when (req_out_v(idx) && missVec(idx)) {
514      // NOTE: for an miss tlb request: either send a ptw request, or ask for a replay
515      when (ptw_just_back || ptw_already_back) {
516        io.tlbreplay(idx) := true.B;
517      } .elsewhen (need_gpa && !need_gpa_vpn_hit && !resp_gpa_refill) {
518        // not send any unrelated ptw request when l1tlb is in need_gpa state
519        io.tlbreplay(idx) := true.B;
520      } .otherwise {
521        io.ptw.req(idx).valid := true.B;
522      }
523    }
524
525    when (io.requestor(idx).req_kill && GatedValidRegNext(io.requestor(idx).req.fire)) {
526      io.ptw.req(idx).valid := false.B
527      io.tlbreplay(idx) := true.B
528    }
529
530    io.ptw.req(idx).bits.vpn := get_pn(req_out(idx).vaddr)
531    io.ptw.req(idx).bits.s2xlate := req_s2xlate
532    io.ptw.req(idx).bits.getGpa := ptw_getGpa
533    io.ptw.req(idx).bits.memidx := req_out(idx).memidx
534  }
535
536  def handle_block(idx: Int): Unit = {
537    // three valid: 1.if exist a entry; 2.if sent to ptw; 3.unset resp.valid
538    io.requestor(idx).req.ready := !req_out_v(idx) || io.requestor(idx).resp.fire
539    // req_out_v for if there is a request, may long latency, fixme
540
541    // miss request entries
542    val req_need_gpa = hasGpf(idx)
543    val miss_req_vpn = get_pn(req_out(idx).vaddr)
544    val miss_req_memidx = req_out(idx).memidx
545    val miss_req_s2xlate = Wire(UInt(2.W))
546    miss_req_s2xlate := MuxCase(noS2xlate, Seq(
547      (!(virt_out(idx) || req_out(idx).hyperinst)) -> noS2xlate,
548      (csr.vsatp.mode =/= 0.U && csr.hgatp.mode =/= 0.U) -> allStage,
549      (csr.vsatp.mode === 0.U) -> onlyStage2,
550      (csr.hgatp.mode === 0.U) -> onlyStage1
551    ))
552    val miss_req_s2xlate_reg = RegEnable(miss_req_s2xlate, io.ptw.req(idx).fire)
553    val hasS2xlate = miss_req_s2xlate_reg =/= noS2xlate
554    val onlyS2 = miss_req_s2xlate_reg === onlyStage2
555    val hit_s1 = io.ptw.resp.bits.s1.hit(miss_req_vpn, Mux(hasS2xlate, csr.vsatp.asid, csr.satp.asid), csr.hgatp.vmid, allType = true, false, hasS2xlate)
556    val hit_s2 = io.ptw.resp.bits.s2.hit(miss_req_vpn, csr.hgatp.vmid)
557    val hit = Mux(onlyS2, hit_s2, hit_s1) && io.ptw.resp.valid && miss_req_s2xlate_reg === io.ptw.resp.bits.s2xlate
558
559    val new_coming_valid = WireInit(false.B)
560    new_coming_valid := req_in(idx).fire && !req_in(idx).bits.kill && !flush_pipe(idx)
561    val new_coming = GatedValidRegNext(new_coming_valid)
562    val miss_wire = new_coming && missVec(idx)
563    val miss_v = ValidHoldBypass(miss_wire, resp(idx).fire, flush_pipe(idx))
564    val miss_req_v = ValidHoldBypass(miss_wire || (miss_v && flush_mmu && !mmu_flush_pipe),
565      io.ptw.req(idx).fire || resp(idx).fire, flush_pipe(idx))
566
567    // when ptw resp, check if hit, reset miss_v, resp to lsu/ifu
568    resp(idx).valid := req_out_v(idx) && !(miss_v && portTranslateEnable(idx))
569    when (io.ptw.resp.fire && hit && req_out_v(idx) && portTranslateEnable(idx)) {
570      val stage1 = io.ptw.resp.bits.s1
571      val stage2 = io.ptw.resp.bits.s2
572      val s2xlate = io.ptw.resp.bits.s2xlate
573      resp(idx).valid := true.B
574      resp(idx).bits.miss := false.B
575      val s1_ppn = stage1.genPPN(get_pn(req_out(idx).vaddr))(ppnLen - 1, 0)
576      val s2_ppn = stage2.genPPNS2(get_pn(req_out(idx).vaddr))(ppnLen - 1, 0)
577      val s1_paddr = Cat(s1_ppn, get_off(req_out(idx).vaddr))
578      val s2_paddr = Cat(s2_ppn, get_off(req_out(idx).vaddr))
579      for (d <- 0 until nRespDups) {
580        resp(idx).bits.paddr(d) := Mux(s2xlate === onlyStage2 || s2xlate === allStage, s2_paddr, s1_paddr)
581        resp(idx).bits.gpaddr(d) := s1_paddr
582        pbmt_check(idx, d, io.ptw.resp.bits.s1.entry.pbmt, io.ptw.resp.bits.s2.entry.pbmt, s2xlate)
583        perm_check(stage1, req_out(idx).cmd, idx, d, stage2, req_out(idx).hlvx, s2xlate)
584      }
585      pmp_check(resp(idx).bits.paddr(0), req_out(idx).size, req_out(idx).cmd, false.B, idx)
586
587      // NOTE: the unfiltered req would be handled by Repeater
588    }
589    assert(RegNext(!resp(idx).valid || resp(idx).ready, true.B), "when tlb resp valid, ready should be true, must")
590    assert(RegNext(req_out_v(idx) || !(miss_v || miss_req_v), true.B), "when not req_out_v, should not set miss_v/miss_req_v")
591
592    val ptw_req = io.ptw.req(idx)
593    ptw_req.valid := miss_req_v
594    ptw_req.bits.vpn := miss_req_vpn
595    ptw_req.bits.s2xlate := miss_req_s2xlate
596    ptw_req.bits.getGpa := req_need_gpa && hitVec(idx)
597    ptw_req.bits.memidx := miss_req_memidx
598
599    io.tlbreplay(idx) := false.B
600
601    // NOTE: when flush pipe, tlb should abandon last req
602    // however, some outside modules like icache, dont care flushPipe, and still waiting for tlb resp
603    // just resp valid and raise page fault to go through. The pipe(ifu) will abandon it.
604    if (!q.outsideRecvFlush) {
605      when (req_out_v(idx) && flush_pipe(idx) && portTranslateEnable(idx)) {
606        resp(idx).valid := true.B
607        for (d <- 0 until nRespDups) {
608          resp(idx).bits.pbmt(d) := 0.U
609          resp(idx).bits.excp(d).pf.ld := true.B // sfence happened, pf for not to use this addr
610          resp(idx).bits.excp(d).pf.st := true.B
611          resp(idx).bits.excp(d).pf.instr := true.B
612        }
613      }
614    }
615  }
616
617  // when ptw resp, tlb at refill_idx maybe set to miss by force.
618  // Bypass ptw resp to check.
619  def ptw_resp_bypass(vpn: UInt, s2xlate: UInt) = {
620    // TODO: RegNext enable: ptw.resp.valid
621    val hasS2xlate = s2xlate =/= noS2xlate
622    val onlyS2 = s2xlate === onlyStage2
623    val onlyS1 = s2xlate === onlyStage1
624    val s2xlate_hit = s2xlate === ptw.resp.bits.s2xlate
625    val resp_hit = ptw.resp.bits.hit(vpn, csr.satp.asid, csr.vsatp.asid, csr.hgatp.vmid, true, false)
626    val p_hit = GatedValidRegNext(resp_hit && io.ptw.resp.fire && s2xlate_hit)
627    val ppn_s1 = ptw.resp.bits.s1.genPPN(vpn)(ppnLen - 1, 0)
628    val gvpn = Mux(onlyS2, vpn, ppn_s1)
629    val ppn_s2 = ptw.resp.bits.s2.genPPNS2(gvpn)(ppnLen - 1, 0)
630    val p_ppn = RegEnable(Mux(s2xlate === onlyStage2 || s2xlate === allStage, ppn_s2, ppn_s1), io.ptw.resp.fire)
631    val p_pbmt = RegEnable(ptw.resp.bits.s1.entry.pbmt,io.ptw.resp.fire)
632    val p_perm = RegEnable(ptwresp_to_tlbperm(ptw.resp.bits.s1), io.ptw.resp.fire)
633    val p_gvpn = RegEnable(Mux(onlyS2, ptw.resp.bits.s2.entry.tag, ptw.resp.bits.s1.genGVPN(vpn)), io.ptw.resp.fire)
634    val p_g_pbmt = RegEnable(ptw.resp.bits.s2.entry.pbmt,io.ptw.resp.fire)
635    val p_g_perm = RegEnable(hptwresp_to_tlbperm(ptw.resp.bits.s2), io.ptw.resp.fire)
636    val p_s2xlate = RegEnable(ptw.resp.bits.s2xlate, io.ptw.resp.fire)
637    val p_s1_level = RegEnable(ptw.resp.bits.s1.entry.level.get, io.ptw.resp.fire)
638    val p_s1_isLeaf = RegEnable(ptw.resp.bits.s1.isLeaf(), io.ptw.resp.fire)
639    val p_s1_isFakePte = RegEnable(ptw.resp.bits.s1.isFakePte(), io.ptw.resp.fire)
640    (p_hit, p_ppn, p_pbmt, p_perm, p_gvpn, p_g_pbmt, p_g_perm, p_s2xlate, p_s1_level, p_s1_isLeaf, p_s1_isFakePte)
641  }
642
643  // perf event
644  val result_ok = req_in.map(a => GatedValidRegNext(a.fire))
645  val perfEvents =
646    Seq(
647      ("access", PopCount((0 until Width).map{i => if (Block(i)) io.requestor(i).req.fire else portTranslateEnable(i) && result_ok(i) })),
648      ("miss  ", PopCount((0 until Width).map{i => if (Block(i)) portTranslateEnable(i) && result_ok(i) && missVec(i) else ptw.req(i).fire })),
649    )
650  generatePerfEvent()
651
652  // perf log
653  for (i <- 0 until Width) {
654    if (Block(i)) {
655      XSPerfAccumulate(s"access${i}",result_ok(i) && portTranslateEnable(i))
656      XSPerfAccumulate(s"miss${i}", result_ok(i) && missVec(i))
657    } else {
658      XSPerfAccumulate("first_access" + Integer.toString(i, 10), result_ok(i) && portTranslateEnable(i) && RegEnable(req(i).bits.debug.isFirstIssue, req(i).valid))
659      XSPerfAccumulate("access" + Integer.toString(i, 10), result_ok(i) && portTranslateEnable(i))
660      XSPerfAccumulate("first_miss" + Integer.toString(i, 10), result_ok(i) && portTranslateEnable(i) && missVec(i) && RegEnable(req(i).bits.debug.isFirstIssue, req(i).valid))
661      XSPerfAccumulate("miss" + Integer.toString(i, 10), result_ok(i) && portTranslateEnable(i) && missVec(i))
662    }
663  }
664  XSPerfAccumulate("ptw_resp_count", ptw.resp.fire)
665  XSPerfAccumulate("ptw_resp_pf_count", ptw.resp.fire && ptw.resp.bits.s1.pf)
666
667  // Log
668  for(i <- 0 until Width) {
669    XSDebug(req(i).valid, p"req(${i.U}): (${req(i).valid} ${req(i).ready}) ${req(i).bits}\n")
670    XSDebug(resp(i).valid, p"resp(${i.U}): (${resp(i).valid} ${resp(i).ready}) ${resp(i).bits}\n")
671  }
672
673  XSDebug(io.sfence.valid, p"Sfence: ${io.sfence}\n")
674  XSDebug(ParallelOR(req_out_v) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hitVec).asUInt)} miss:${Binary(VecInit(missVec).asUInt)}\n")
675  for (i <- ptw.req.indices) {
676    XSDebug(ptw.req(i).fire, p"L2TLB req:${ptw.req(i).bits}\n")
677  }
678  XSDebug(ptw.resp.valid, p"L2TLB resp:${ptw.resp.bits} (v:${ptw.resp.valid}r:${ptw.resp.ready}) \n")
679
680  println(s"${q.name}: page: ${q.NWays} ${q.Associative} ${q.Replacer.get}")
681
682  if (env.EnableDifftest) {
683    for (i <- 0 until Width) {
684      val pf = io.requestor(i).resp.bits.excp(0).pf.instr || io.requestor(i).resp.bits.excp(0).pf.st || io.requestor(i).resp.bits.excp(0).pf.ld
685      val gpf = io.requestor(i).resp.bits.excp(0).gpf.instr || io.requestor(i).resp.bits.excp(0).gpf.st || io.requestor(i).resp.bits.excp(0).gpf.ld
686      val af = io.requestor(i).resp.bits.excp(0).af.instr || io.requestor(i).resp.bits.excp(0).af.st || io.requestor(i).resp.bits.excp(0).af.ld
687      val difftest = DifftestModule(new DiffL1TLBEvent)
688      difftest.coreid := io.hartId
689      difftest.valid := RegNext(io.requestor(i).req.fire) && !io.requestor(i).req_kill && io.requestor(i).resp.fire && !io.requestor(i).resp.bits.miss && !pf && !af && !gpf && portTranslateEnable(i)
690      if (!Seq("itlb", "ldtlb", "sttlb").contains(q.name)) {
691        difftest.valid := false.B
692      }
693      difftest.index := TLBDiffId(p(XSCoreParamsKey).HartId).U
694      difftest.vpn := RegEnable(get_pn(req_in(i).bits.vaddr), req_in(i).valid)
695      difftest.ppn := get_pn(io.requestor(i).resp.bits.paddr(0))
696      difftest.satp := Cat(csr.satp.mode, csr.satp.asid, csr.satp.ppn)
697      difftest.vsatp := Cat(csr.vsatp.mode, csr.vsatp.asid, csr.vsatp.ppn)
698      difftest.hgatp := Cat(csr.hgatp.mode, csr.hgatp.vmid, csr.hgatp.ppn)
699      val req_need_gpa = gpf
700      val req_s2xlate = Wire(UInt(2.W))
701      req_s2xlate := MuxCase(noS2xlate, Seq(
702        (!RegNext(virt_in || req_in(i).bits.hyperinst)) -> noS2xlate,
703        (csr.vsatp.mode =/= 0.U && csr.hgatp.mode =/= 0.U) -> allStage,
704        (csr.vsatp.mode === 0.U) -> onlyStage2,
705        (csr.hgatp.mode === 0.U) -> onlyStage1
706      ))
707      difftest.s2xlate := req_s2xlate
708    }
709  }
710}
711
712object TLBDiffId {
713  var i: Int = 0
714  var lastHartId: Int = -1
715  def apply(hartId: Int): Int = {
716    if (lastHartId != hartId) {
717      i = 0
718      lastHartId = hartId
719    }
720    i += 1
721    i - 1
722  }
723}
724
725class TLBNonBlock(Width: Int, nRespDups: Int = 1, q: TLBParameters)(implicit p: Parameters) extends TLB(Width, nRespDups, Seq.fill(Width)(false), q)
726class TLBBLock(Width: Int, nRespDups: Int = 1, q: TLBParameters)(implicit p: Parameters) extends TLB(Width, nRespDups, Seq.fill(Width)(true), q)
727
728class TlbReplace(Width: Int, q: TLBParameters)(implicit p: Parameters) extends TlbModule {
729  val io = IO(new TlbReplaceIO(Width, q))
730
731  if (q.Associative == "fa") {
732    val re = ReplacementPolicy.fromString(q.Replacer, q.NWays)
733    re.access(io.page.access.map(_.touch_ways))
734    io.page.refillIdx := re.way
735  } else { // set-acco && plru
736    val re = ReplacementPolicy.fromString(q.Replacer, q.NSets, q.NWays)
737    re.access(io.page.access.map(_.sets), io.page.access.map(_.touch_ways))
738    io.page.refillIdx := { if (q.NWays == 1) 0.U else re.way(io.page.chosen_set) }
739  }
740}
741