xref: /XiangShan/src/main/scala/xiangshan/mem/prefetch/L1PrefetchComponent.scala (revision 99ce5576f0ecce1b5045b7bc0dbbb2debd934fbb)
10d32f713Shappy-lxpackage xiangshan.mem.prefetch
20d32f713Shappy-lx
38891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters
40d32f713Shappy-lximport chisel3._
50d32f713Shappy-lximport chisel3.util._
69e12e8edScz4eimport freechips.rocketchip.util._
70d32f713Shappy-lximport utils._
80d32f713Shappy-lximport utility._
99e12e8edScz4eimport xiangshan._
1025a80bceSYanqin Liimport xiangshan.backend.fu.PMPRespBundle
119e12e8edScz4eimport xiangshan.mem.L1PrefetchReq
12*99ce5576Scz4eimport xiangshan.mem.Bundles.LsPrefetchTrainBundle
130d32f713Shappy-lximport xiangshan.mem.trace._
140d32f713Shappy-lximport xiangshan.mem.L1PrefetchSource
159e12e8edScz4eimport xiangshan.cache.HasDCacheParameters
169e12e8edScz4eimport xiangshan.cache.mmu._
170d32f713Shappy-lx
180d32f713Shappy-lxtrait HasL1PrefetchHelper extends HasCircularQueuePtrHelper with HasDCacheParameters {
190d32f713Shappy-lx  // region related
200d32f713Shappy-lx  val REGION_SIZE = 1024
210d32f713Shappy-lx  val PAGE_OFFSET = 12
220d32f713Shappy-lx  val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes)
230d32f713Shappy-lx  val BIT_VEC_WITDH = REGION_SIZE / dcacheParameters.blockBytes
240d32f713Shappy-lx  val REGION_BITS = log2Up(BIT_VEC_WITDH)
250d32f713Shappy-lx  val REGION_TAG_OFFSET = BLOCK_OFFSET + REGION_BITS
260d32f713Shappy-lx  val REGION_TAG_BITS = VAddrBits - BLOCK_OFFSET - REGION_BITS
270d32f713Shappy-lx
280d32f713Shappy-lx  // hash related
290d32f713Shappy-lx  val VADDR_HASH_WIDTH = 5
300d32f713Shappy-lx  val BLK_ADDR_RAW_WIDTH = 10
310d32f713Shappy-lx  val HASH_TAG_WIDTH = VADDR_HASH_WIDTH + BLK_ADDR_RAW_WIDTH
320d32f713Shappy-lx
330d32f713Shappy-lx  // capacity related
3420e09ab1Shappy-lx  val MLP_SIZE = 32
3520e09ab1Shappy-lx  val MLP_L1_SIZE = 16
3620e09ab1Shappy-lx  val MLP_L2L3_SIZE = MLP_SIZE - MLP_L1_SIZE
370d32f713Shappy-lx
380d32f713Shappy-lx  // prefetch sink related
390d32f713Shappy-lx  val SINK_BITS = 2
400d32f713Shappy-lx  def SINK_L1 = "b00".U
410d32f713Shappy-lx  def SINK_L2 = "b01".U
420d32f713Shappy-lx  def SINK_L3 = "b10".U
430d32f713Shappy-lx
440d32f713Shappy-lx  // vaddr: |       region tag        |  region bits  | block offset |
450d32f713Shappy-lx  def get_region_tag(vaddr: UInt) = {
460d32f713Shappy-lx    require(vaddr.getWidth == VAddrBits)
470d32f713Shappy-lx    vaddr(vaddr.getWidth - 1, REGION_TAG_OFFSET)
480d32f713Shappy-lx  }
490d32f713Shappy-lx
500d32f713Shappy-lx  def get_region_bits(vaddr: UInt) = {
510d32f713Shappy-lx    require(vaddr.getWidth == VAddrBits)
520d32f713Shappy-lx    vaddr(REGION_TAG_OFFSET - 1, BLOCK_OFFSET)
530d32f713Shappy-lx  }
540d32f713Shappy-lx
550d32f713Shappy-lx  def block_addr(x: UInt): UInt = {
560d32f713Shappy-lx    x(x.getWidth - 1, BLOCK_OFFSET)
570d32f713Shappy-lx  }
580d32f713Shappy-lx
590d32f713Shappy-lx  def vaddr_hash(x: UInt): UInt = {
600d32f713Shappy-lx    val width = VADDR_HASH_WIDTH
610d32f713Shappy-lx    val low = x(width - 1, 0)
620d32f713Shappy-lx    val mid = x(2 * width - 1, width)
630d32f713Shappy-lx    val high = x(3 * width - 1, 2 * width)
640d32f713Shappy-lx    low ^ mid ^ high
650d32f713Shappy-lx  }
660d32f713Shappy-lx
670d32f713Shappy-lx  def pc_hash_tag(x: UInt): UInt = {
680d32f713Shappy-lx    val low = x(BLK_ADDR_RAW_WIDTH - 1, 0)
690d32f713Shappy-lx    val high = x(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
700d32f713Shappy-lx    val high_hash = vaddr_hash(high)
710d32f713Shappy-lx    Cat(high_hash, low)
720d32f713Shappy-lx  }
730d32f713Shappy-lx
740d32f713Shappy-lx  def block_hash_tag(x: UInt): UInt = {
750d32f713Shappy-lx    val blk_addr = block_addr(x)
760d32f713Shappy-lx    val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0)
770d32f713Shappy-lx    val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
780d32f713Shappy-lx    val high_hash = vaddr_hash(high)
790d32f713Shappy-lx    Cat(high_hash, low)
800d32f713Shappy-lx  }
810d32f713Shappy-lx
820d32f713Shappy-lx  def region_hash_tag(region_tag: UInt): UInt = {
830d32f713Shappy-lx    val low = region_tag(BLK_ADDR_RAW_WIDTH - 1, 0)
840d32f713Shappy-lx    val high = region_tag(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
850d32f713Shappy-lx    val high_hash = vaddr_hash(high)
860d32f713Shappy-lx    Cat(high_hash, low)
870d32f713Shappy-lx  }
880d32f713Shappy-lx
890d32f713Shappy-lx  def region_to_block_addr(region_tag: UInt, region_bits: UInt): UInt = {
900d32f713Shappy-lx    Cat(region_tag, region_bits)
910d32f713Shappy-lx  }
920d32f713Shappy-lx
930d32f713Shappy-lx  def get_candidate_oh(x: UInt): UInt = {
940d32f713Shappy-lx    require(x.getWidth == PAddrBits)
950d32f713Shappy-lx    UIntToOH(x(REGION_BITS + BLOCK_OFFSET - 1, BLOCK_OFFSET))
960d32f713Shappy-lx  }
970d32f713Shappy-lx
980d32f713Shappy-lx  def toBinary(n: Int): String = n match {
990d32f713Shappy-lx    case 0|1 => s"$n"
1000d32f713Shappy-lx    case _   => s"${toBinary(n/2)}${n%2}"
1010d32f713Shappy-lx  }
1020d32f713Shappy-lx}
1030d32f713Shappy-lx
1040d32f713Shappy-lxtrait HasTrainFilterHelper extends HasCircularQueuePtrHelper {
105*99ce5576Scz4e  def reorder[T <: LsPrefetchTrainBundle](source: Vec[ValidIO[T]]): Vec[ValidIO[T]] = {
1060d32f713Shappy-lx    if(source.length == 1) {
1070d32f713Shappy-lx      source
1080d32f713Shappy-lx    }else if(source.length == 2) {
1090d32f713Shappy-lx      val source_v = source.map(_.valid)
1100d32f713Shappy-lx      val res = Wire(source.cloneType)
111f410867eShappy-lx      // source 1 is older than source 0 (only when source0/1 are both valid)
112f410867eShappy-lx      val source_1_older = Mux(Cat(source_v).andR,
113f410867eShappy-lx        isBefore(source(1).bits.uop.robIdx, source(0).bits.uop.robIdx),
114f410867eShappy-lx        false.B
115f410867eShappy-lx      )
1160d32f713Shappy-lx      when(source_1_older) {
1170d32f713Shappy-lx        res(0) := source(1)
1180d32f713Shappy-lx        res(1) := source(0)
1190d32f713Shappy-lx      }.otherwise {
1200d32f713Shappy-lx        res := source
1210d32f713Shappy-lx      }
1220d32f713Shappy-lx
1230d32f713Shappy-lx      res
1240d32f713Shappy-lx    }else if(source.length == 3) {
1250d32f713Shappy-lx      // TODO: generalize
126255bd5b1Slixin      val res_0_1 = Reg(source.cloneType)
127255bd5b1Slixin      val res_1_2 = Reg(source.cloneType)
128255bd5b1Slixin      val res = Reg(source.cloneType)
1290d32f713Shappy-lx
1300d32f713Shappy-lx      val tmp = reorder(VecInit(source.slice(0, 2)))
1310d32f713Shappy-lx      res_0_1(0) := tmp(0)
1320d32f713Shappy-lx      res_0_1(1) := tmp(1)
1330d32f713Shappy-lx      res_0_1(2) := source(2)
1340d32f713Shappy-lx      val tmp_1 = reorder(VecInit(res_0_1.slice(1, 3)))
1350d32f713Shappy-lx      res_1_2(0) := res_0_1(0)
1360d32f713Shappy-lx      res_1_2(1) := tmp_1(0)
1370d32f713Shappy-lx      res_1_2(2) := tmp_1(1)
1380d32f713Shappy-lx      val tmp_2 = reorder(VecInit(res_1_2.slice(0, 2)))
1390d32f713Shappy-lx      res(0) := tmp_2(0)
1400d32f713Shappy-lx      res(1) := tmp_2(1)
1410d32f713Shappy-lx      res(2) := res_1_2(2)
1420d32f713Shappy-lx
1430d32f713Shappy-lx      res
1440d32f713Shappy-lx    }else {
1450d32f713Shappy-lx      require(false, "for now, 4 or more sources are invalid")
1460d32f713Shappy-lx      source
1470d32f713Shappy-lx    }
1480d32f713Shappy-lx  }
1490d32f713Shappy-lx}
1500d32f713Shappy-lx
1510d32f713Shappy-lx// get prefetch train reqs from `exuParameters.LduCnt` load pipelines (up to `exuParameters.LduCnt`/cycle)
1520d32f713Shappy-lx// filter by cache line address, send out train req to stride (up to 1 req/cycle)
1530d32f713Shappy-lxclass TrainFilter(size: Int, name: String)(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper with HasTrainFilterHelper {
1540d32f713Shappy-lx  val io = IO(new Bundle() {
1550d32f713Shappy-lx    val enable = Input(Bool())
1560d32f713Shappy-lx    val flush = Input(Bool())
1570d32f713Shappy-lx    // train input, only from load for now
158*99ce5576Scz4e    val ld_in = Flipped(Vec(backendParams.LduCnt, ValidIO(new LsPrefetchTrainBundle())))
1590d32f713Shappy-lx    // filter out
1600d32f713Shappy-lx    val train_req = DecoupledIO(new PrefetchReqBundle())
1610d32f713Shappy-lx  })
1620d32f713Shappy-lx
1630d32f713Shappy-lx  class Ptr(implicit p: Parameters) extends CircularQueuePtr[Ptr]( p => size ){}
1640d32f713Shappy-lx  object Ptr {
1650d32f713Shappy-lx    def apply(f: Bool, v: UInt)(implicit p: Parameters): Ptr = {
1660d32f713Shappy-lx      val ptr = Wire(new Ptr)
1670d32f713Shappy-lx      ptr.flag := f
1680d32f713Shappy-lx      ptr.value := v
1690d32f713Shappy-lx      ptr
1700d32f713Shappy-lx    }
1710d32f713Shappy-lx  }
1720d32f713Shappy-lx
17370eea123SYanqin Li  val entries = Reg(Vec(size, new PrefetchReqBundle))
1740d32f713Shappy-lx  val valids = RegInit(VecInit(Seq.fill(size){ (false.B) }))
1750d32f713Shappy-lx
1760d32f713Shappy-lx  // enq
17783ba63b3SXuan Hu  val enqLen = backendParams.LduCnt
1780d32f713Shappy-lx  val enqPtrExt = RegInit(VecInit((0 until enqLen).map(_.U.asTypeOf(new Ptr))))
1790d32f713Shappy-lx  val deqPtrExt = RegInit(0.U.asTypeOf(new Ptr))
1800d32f713Shappy-lx
1810d32f713Shappy-lx  val deqPtr = WireInit(deqPtrExt.value)
1820d32f713Shappy-lx
1830d32f713Shappy-lx  require(size >= enqLen)
1840d32f713Shappy-lx
1850d32f713Shappy-lx  val ld_in_reordered = reorder(io.ld_in)
186*99ce5576Scz4e  val reqs_l = ld_in_reordered.map(_.bits.toPrefetchReqBundle())
1870d32f713Shappy-lx  val reqs_vl = ld_in_reordered.map(_.valid)
1880d32f713Shappy-lx  val needAlloc = Wire(Vec(enqLen, Bool()))
1890d32f713Shappy-lx  val canAlloc = Wire(Vec(enqLen, Bool()))
1900d32f713Shappy-lx
1910d32f713Shappy-lx  for(i <- (0 until enqLen)) {
1920d32f713Shappy-lx    val req = reqs_l(i)
1930d32f713Shappy-lx    val req_v = reqs_vl(i)
1940d32f713Shappy-lx    val index = PopCount(needAlloc.take(i))
1950d32f713Shappy-lx    val allocPtr = enqPtrExt(index)
1960d32f713Shappy-lx    val entry_match = Cat(entries.zip(valids).map {
1970d32f713Shappy-lx      case(e, v) => v && block_hash_tag(e.vaddr) === block_hash_tag(req.vaddr)
1980d32f713Shappy-lx    }).orR
1990d32f713Shappy-lx    val prev_enq_match = if(i == 0) false.B else Cat(reqs_l.zip(reqs_vl).take(i).map {
2000d32f713Shappy-lx      case(pre, pre_v) => pre_v && block_hash_tag(pre.vaddr) === block_hash_tag(req.vaddr)
2010d32f713Shappy-lx    }).orR
2020d32f713Shappy-lx
2030d32f713Shappy-lx    needAlloc(i) := req_v && !entry_match && !prev_enq_match
2040d32f713Shappy-lx    canAlloc(i) := needAlloc(i) && allocPtr >= deqPtrExt && io.enable
2050d32f713Shappy-lx
2060d32f713Shappy-lx    when(canAlloc(i)) {
2070d32f713Shappy-lx      valids(allocPtr.value) := true.B
2080d32f713Shappy-lx      entries(allocPtr.value) := req
2090d32f713Shappy-lx    }
2100d32f713Shappy-lx  }
2110d32f713Shappy-lx  val allocNum = PopCount(canAlloc)
2120d32f713Shappy-lx
2134ccb2e8bSYanqin Li  enqPtrExt.foreach{case x => when(canAlloc.asUInt.orR) {x := x + allocNum} }
2140d32f713Shappy-lx
2150d32f713Shappy-lx  // deq
2160d32f713Shappy-lx  io.train_req.valid := false.B
2170d32f713Shappy-lx  io.train_req.bits := DontCare
2180d32f713Shappy-lx  valids.zip(entries).zipWithIndex.foreach {
2190d32f713Shappy-lx    case((valid, entry), i) => {
2200d32f713Shappy-lx      when(deqPtr === i.U) {
2210d32f713Shappy-lx        io.train_req.valid := valid && io.enable
2220d32f713Shappy-lx        io.train_req.bits := entry
2230d32f713Shappy-lx      }
2240d32f713Shappy-lx    }
2250d32f713Shappy-lx  }
2260d32f713Shappy-lx
2270d32f713Shappy-lx  when(io.train_req.fire) {
2280d32f713Shappy-lx    valids(deqPtr) := false.B
2290d32f713Shappy-lx    deqPtrExt := deqPtrExt + 1.U
2300d32f713Shappy-lx  }
2310d32f713Shappy-lx
2320d32f713Shappy-lx  when(RegNext(io.flush)) {
2330d32f713Shappy-lx    valids.foreach {case valid => valid := false.B}
2340d32f713Shappy-lx    (0 until enqLen).map {case i => enqPtrExt(i) := i.U.asTypeOf(new Ptr)}
2350d32f713Shappy-lx    deqPtrExt := 0.U.asTypeOf(new Ptr)
2360d32f713Shappy-lx  }
2370d32f713Shappy-lx
2380d32f713Shappy-lx  XSPerfAccumulate(s"${name}_train_filter_full", PopCount(valids) === size.U)
2390d32f713Shappy-lx  XSPerfAccumulate(s"${name}_train_filter_half", PopCount(valids) >= (size / 2).U)
2400d32f713Shappy-lx  XSPerfAccumulate(s"${name}_train_filter_empty", PopCount(valids) === 0.U)
2410d32f713Shappy-lx
2420d32f713Shappy-lx  val raw_enq_pattern = Cat(reqs_vl)
2430d32f713Shappy-lx  val filtered_enq_pattern = Cat(needAlloc)
2440d32f713Shappy-lx  val actual_enq_pattern = Cat(canAlloc)
2450d32f713Shappy-lx  XSPerfAccumulate(s"${name}_train_filter_enq", allocNum > 0.U)
2460d32f713Shappy-lx  XSPerfAccumulate(s"${name}_train_filter_deq", io.train_req.fire)
2470d32f713Shappy-lx  for(i <- 0 until (1 << enqLen)) {
2480d32f713Shappy-lx    XSPerfAccumulate(s"${name}_train_filter_raw_enq_pattern_${toBinary(i)}", raw_enq_pattern === i.U)
2490d32f713Shappy-lx    XSPerfAccumulate(s"${name}_train_filter_filtered_enq_pattern_${toBinary(i)}", filtered_enq_pattern === i.U)
2500d32f713Shappy-lx    XSPerfAccumulate(s"${name}_train_filter_actual_enq_pattern_${toBinary(i)}", actual_enq_pattern === i.U)
2510d32f713Shappy-lx  }
2520d32f713Shappy-lx}
2530d32f713Shappy-lx
2540d32f713Shappy-lxclass MLPReqFilterBundle(implicit p: Parameters) extends XSBundle with HasL1PrefetchHelper {
2550d32f713Shappy-lx  val tag = UInt(HASH_TAG_WIDTH.W)
2560d32f713Shappy-lx  val region = UInt(REGION_TAG_BITS.W)
2570d32f713Shappy-lx  val bit_vec = UInt(BIT_VEC_WITDH.W)
2580d32f713Shappy-lx  // NOTE: l1 will not use sent_vec, for making more prefetch reqs to l1 dcache
2590d32f713Shappy-lx  val sent_vec = UInt(BIT_VEC_WITDH.W)
2600d32f713Shappy-lx  val sink = UInt(SINK_BITS.W)
2610d32f713Shappy-lx  val alias = UInt(2.W)
2620d32f713Shappy-lx  val is_vaddr = Bool()
2630d32f713Shappy-lx  val source = new L1PrefetchSource()
26420e09ab1Shappy-lx  val debug_va_region = UInt(REGION_TAG_BITS.W)
2650d32f713Shappy-lx
2660d32f713Shappy-lx  def reset(index: Int) = {
2670d32f713Shappy-lx    tag := region_hash_tag(index.U)
2680d32f713Shappy-lx    region := index.U
2690d32f713Shappy-lx    bit_vec := 0.U
2700d32f713Shappy-lx    sent_vec := 0.U
2710d32f713Shappy-lx    sink := SINK_L1
2720d32f713Shappy-lx    alias := 0.U
2730d32f713Shappy-lx    is_vaddr := false.B
2740d32f713Shappy-lx    source.value := L1_HW_PREFETCH_NULL
27520e09ab1Shappy-lx    debug_va_region := 0.U
2760d32f713Shappy-lx  }
2770d32f713Shappy-lx
27870eea123SYanqin Li  def tag_match(valid1: Bool, valid2: Bool, new_tag: UInt): Bool = {
2790d32f713Shappy-lx    require(new_tag.getWidth == HASH_TAG_WIDTH)
28070eea123SYanqin Li    (tag === new_tag) && valid1 && valid2
2810d32f713Shappy-lx  }
2820d32f713Shappy-lx
2830d32f713Shappy-lx  def update(update_bit_vec: UInt, update_sink: UInt) = {
2840d32f713Shappy-lx    bit_vec := bit_vec | update_bit_vec
2850d32f713Shappy-lx    when(update_sink < sink) {
2860d32f713Shappy-lx      bit_vec := (bit_vec & ~sent_vec) | update_bit_vec
2870d32f713Shappy-lx      sink := update_sink
2880d32f713Shappy-lx    }
2890d32f713Shappy-lx
2900d32f713Shappy-lx    assert(PopCount(update_bit_vec) >= 1.U, "valid bits in update vector should greater than one")
2910d32f713Shappy-lx  }
2920d32f713Shappy-lx
29370eea123SYanqin Li  def can_send_pf(valid: Bool): Bool = {
2940d32f713Shappy-lx    Mux(
2950d32f713Shappy-lx      sink === SINK_L1,
2960d32f713Shappy-lx      !is_vaddr && bit_vec.orR,
2970d32f713Shappy-lx      !is_vaddr && (bit_vec & ~sent_vec).orR
29820e09ab1Shappy-lx    ) && valid
29920e09ab1Shappy-lx  }
30020e09ab1Shappy-lx
30170eea123SYanqin Li  def may_be_replace(valid: Bool): Bool = {
30220e09ab1Shappy-lx    // either invalid or has sent out all reqs out
30320e09ab1Shappy-lx    !valid || RegNext(PopCount(sent_vec) === BIT_VEC_WITDH.U)
3040d32f713Shappy-lx  }
3050d32f713Shappy-lx
3060d32f713Shappy-lx  def get_pf_addr(): UInt = {
3070d32f713Shappy-lx    require(PAddrBits <= VAddrBits)
3080d32f713Shappy-lx    require((region.getWidth + REGION_BITS + BLOCK_OFFSET) == VAddrBits)
3090d32f713Shappy-lx
3100d32f713Shappy-lx    val candidate = Mux(
3110d32f713Shappy-lx      sink === SINK_L1,
3120d32f713Shappy-lx      PriorityEncoder(bit_vec).asTypeOf(UInt(REGION_BITS.W)),
3130d32f713Shappy-lx      PriorityEncoder(bit_vec & ~sent_vec).asTypeOf(UInt(REGION_BITS.W))
3140d32f713Shappy-lx    )
3150d32f713Shappy-lx    Cat(region, candidate, 0.U(BLOCK_OFFSET.W))
3160d32f713Shappy-lx  }
3170d32f713Shappy-lx
31820e09ab1Shappy-lx  def get_pf_debug_vaddr(): UInt = {
31920e09ab1Shappy-lx    val candidate = Mux(
32020e09ab1Shappy-lx      sink === SINK_L1,
32120e09ab1Shappy-lx      PriorityEncoder(bit_vec).asTypeOf(UInt(REGION_BITS.W)),
32220e09ab1Shappy-lx      PriorityEncoder(bit_vec & ~sent_vec).asTypeOf(UInt(REGION_BITS.W))
32320e09ab1Shappy-lx    )
32420e09ab1Shappy-lx    Cat(debug_va_region, candidate, 0.U(BLOCK_OFFSET.W))
32520e09ab1Shappy-lx  }
32620e09ab1Shappy-lx
3270d32f713Shappy-lx  def get_tlb_va(): UInt = {
3280d32f713Shappy-lx    require((region.getWidth + REGION_TAG_OFFSET) == VAddrBits)
3290d32f713Shappy-lx    Cat(region, 0.U(REGION_TAG_OFFSET.W))
3300d32f713Shappy-lx  }
3310d32f713Shappy-lx
3320d32f713Shappy-lx  def fromStreamPrefetchReqBundle(x : StreamPrefetchReqBundle): MLPReqFilterBundle = {
3330d32f713Shappy-lx    require(PAGE_OFFSET >= REGION_TAG_OFFSET, "region is greater than 4k, alias bit may be incorrect")
3340d32f713Shappy-lx
3350d32f713Shappy-lx    val res = Wire(new MLPReqFilterBundle)
3360d32f713Shappy-lx    res.tag := region_hash_tag(x.region)
3370d32f713Shappy-lx    res.region := x.region
3380d32f713Shappy-lx    res.bit_vec := x.bit_vec
3390d32f713Shappy-lx    res.sent_vec := 0.U
3400d32f713Shappy-lx    res.sink := x.sink
3410d32f713Shappy-lx    res.is_vaddr := true.B
3420d32f713Shappy-lx    res.source := x.source
3430d32f713Shappy-lx    res.alias := x.region(PAGE_OFFSET - REGION_TAG_OFFSET + 1, PAGE_OFFSET - REGION_TAG_OFFSET)
34420e09ab1Shappy-lx    res.debug_va_region := x.region
3450d32f713Shappy-lx
3460d32f713Shappy-lx    res
3470d32f713Shappy-lx  }
3480d32f713Shappy-lx
3490d32f713Shappy-lx  def invalidate() = {
3500d32f713Shappy-lx    // disable sending pf req
3510d32f713Shappy-lx    when(sink === SINK_L1) {
3520d32f713Shappy-lx      bit_vec := 0.U(BIT_VEC_WITDH.W)
3530d32f713Shappy-lx    }.otherwise {
3540d32f713Shappy-lx      sent_vec := ~(0.U(BIT_VEC_WITDH.W))
3550d32f713Shappy-lx    }
3560d32f713Shappy-lx    // disable sending tlb req
3570d32f713Shappy-lx    is_vaddr := false.B
3580d32f713Shappy-lx  }
3590d32f713Shappy-lx}
3600d32f713Shappy-lx
3610d32f713Shappy-lx// there are 5 independent pipelines inside
3620d32f713Shappy-lx// 1. prefetch enqueue
3630d32f713Shappy-lx// 2. tlb request
3640d32f713Shappy-lx// 3. actual l1 prefetch
3650d32f713Shappy-lx// 4. actual l2 prefetch
3660d32f713Shappy-lx// 5. actual l3 prefetch
3670d32f713Shappy-lxclass MutiLevelPrefetchFilter(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper {
3680d32f713Shappy-lx  val io = IO(new XSBundle {
3690d32f713Shappy-lx    val enable = Input(Bool())
3700d32f713Shappy-lx    val flush = Input(Bool())
37120e09ab1Shappy-lx    val l1_prefetch_req = Flipped(ValidIO(new StreamPrefetchReqBundle))
37220e09ab1Shappy-lx    val l2_l3_prefetch_req = Flipped(ValidIO(new StreamPrefetchReqBundle))
3730d32f713Shappy-lx    val tlb_req = new TlbRequestIO(nRespDups = 2)
37425a80bceSYanqin Li    val pmp_resp = Flipped(new PMPRespBundle())
3750d32f713Shappy-lx    val l1_req = DecoupledIO(new L1PrefetchReq())
3760d32f713Shappy-lx    val l2_pf_addr = ValidIO(new L2PrefetchReq())
3770d32f713Shappy-lx    val l3_pf_addr = ValidIO(UInt(PAddrBits.W)) // TODO: l3 pf source
3780d32f713Shappy-lx    val confidence = Input(UInt(1.W))
3790d32f713Shappy-lx    val l2PfqBusy = Input(Bool())
3800d32f713Shappy-lx  })
3810d32f713Shappy-lx
38220e09ab1Shappy-lx  val l1_array = Reg(Vec(MLP_L1_SIZE, new MLPReqFilterBundle))
38320e09ab1Shappy-lx  val l2_array = Reg(Vec(MLP_L2L3_SIZE, new MLPReqFilterBundle))
38470eea123SYanqin Li  val l1_valids = RegInit(VecInit(Seq.fill(MLP_L1_SIZE)(false.B)))
38570eea123SYanqin Li  val l2_valids = RegInit(VecInit(Seq.fill(MLP_L2L3_SIZE)(false.B)))
38670eea123SYanqin Li
38770eea123SYanqin Li  def _invalid(e: MLPReqFilterBundle, v: Bool): Unit = {
38870eea123SYanqin Li    v := false.B
38970eea123SYanqin Li    e.invalidate()
39070eea123SYanqin Li  }
39170eea123SYanqin Li
39270eea123SYanqin Li  def invalid_array(i: UInt, isL2: Boolean): Unit = {
39370eea123SYanqin Li    if (isL2) {
39470eea123SYanqin Li      _invalid(l2_array(i), l2_valids(i))
39570eea123SYanqin Li    } else {
39670eea123SYanqin Li      _invalid(l1_array(i), l1_valids(i))
39770eea123SYanqin Li    }
39870eea123SYanqin Li  }
39970eea123SYanqin Li
40070eea123SYanqin Li  def _reset(e: MLPReqFilterBundle, v: Bool, idx: Int): Unit = {
40170eea123SYanqin Li    v := false.B
40270eea123SYanqin Li    //only need to reset control signals for firendly area
40370eea123SYanqin Li    // e.reset(idx)
40470eea123SYanqin Li  }
40570eea123SYanqin Li
40670eea123SYanqin Li
40770eea123SYanqin Li  def reset_array(i: Int, isL2: Boolean): Unit = {
40870eea123SYanqin Li    if(isL2){
40970eea123SYanqin Li      _reset(l2_array(i), l2_valids(i), i)
41070eea123SYanqin Li    }else{
41170eea123SYanqin Li      _reset(l1_array(i), l1_valids(i), i)
41270eea123SYanqin Li    }
41370eea123SYanqin Li  }
41470eea123SYanqin Li
41520e09ab1Shappy-lx  val l1_replacement = new ValidPseudoLRU(MLP_L1_SIZE)
41620e09ab1Shappy-lx  val l2_replacement = new ValidPseudoLRU(MLP_L2L3_SIZE)
4170d32f713Shappy-lx  val tlb_req_arb = Module(new RRArbiterInit(new TlbReq, MLP_SIZE))
41820e09ab1Shappy-lx  val l1_pf_req_arb = Module(new RRArbiterInit(new Bundle {
41920e09ab1Shappy-lx    val req = new L1PrefetchReq
42020e09ab1Shappy-lx    val debug_vaddr = UInt(VAddrBits.W)
42120e09ab1Shappy-lx  }, MLP_L1_SIZE))
42220e09ab1Shappy-lx  val l2_pf_req_arb = Module(new RRArbiterInit(new Bundle {
42320e09ab1Shappy-lx    val req = new L2PrefetchReq
42420e09ab1Shappy-lx    val debug_vaddr = UInt(VAddrBits.W)
42520e09ab1Shappy-lx  }, MLP_L2L3_SIZE))
42620e09ab1Shappy-lx  val l3_pf_req_arb = Module(new RRArbiterInit(UInt(PAddrBits.W), MLP_L2L3_SIZE))
4270d32f713Shappy-lx
42870eea123SYanqin Li  val l1_opt_replace_vec = VecInit(l1_array.zip(l1_valids).map{case (e, v) => e.may_be_replace(v)})
42970eea123SYanqin Li  val l2_opt_replace_vec = VecInit(l2_array.zip(l2_valids).map{case (e, v) => e.may_be_replace(v)})
43020e09ab1Shappy-lx  // if we have something to replace, then choose it, otherwise follow the plru manner
43120e09ab1Shappy-lx  val l1_real_replace_vec = Mux(Cat(l1_opt_replace_vec).orR, l1_opt_replace_vec, VecInit(Seq.fill(MLP_L1_SIZE)(true.B)))
43220e09ab1Shappy-lx  val l2_real_replace_vec = Mux(Cat(l2_opt_replace_vec).orR, l2_opt_replace_vec, VecInit(Seq.fill(MLP_L2L3_SIZE)(true.B)))
43320e09ab1Shappy-lx
43420e09ab1Shappy-lx  // l1 pf req enq
4350d32f713Shappy-lx  // s0: hash tag match
43620e09ab1Shappy-lx  val s0_l1_can_accept = Wire(Bool())
43720e09ab1Shappy-lx  val s0_l1_valid = io.l1_prefetch_req.valid && s0_l1_can_accept
43820e09ab1Shappy-lx  val s0_l1_region = io.l1_prefetch_req.bits.region
43920e09ab1Shappy-lx  val s0_l1_region_hash = region_hash_tag(s0_l1_region)
44070eea123SYanqin Li  val s0_l1_match_vec = l1_array.zip(l1_valids).map{ case (e, v) => e.tag_match(v, s0_l1_valid, s0_l1_region_hash)}
44120e09ab1Shappy-lx  val s0_l1_hit = VecInit(s0_l1_match_vec).asUInt.orR
44220e09ab1Shappy-lx  val s0_l1_index = Wire(UInt(log2Up(MLP_L1_SIZE).W))
44320e09ab1Shappy-lx  val s0_l1_prefetch_req = (new MLPReqFilterBundle).fromStreamPrefetchReqBundle(io.l1_prefetch_req.bits)
4440d32f713Shappy-lx
44520e09ab1Shappy-lx  s0_l1_index := Mux(s0_l1_hit, OHToUInt(VecInit(s0_l1_match_vec).asUInt), l1_replacement.way(l1_real_replace_vec.reverse)._2)
44620e09ab1Shappy-lx
44720e09ab1Shappy-lx  when(s0_l1_valid) {
44820e09ab1Shappy-lx    l1_replacement.access(s0_l1_index)
4490d32f713Shappy-lx  }
4500d32f713Shappy-lx
45120e09ab1Shappy-lx  assert(!s0_l1_valid || PopCount(VecInit(s0_l1_match_vec)) <= 1.U, "req region should match no more than 1 entry")
4520d32f713Shappy-lx
45320e09ab1Shappy-lx  XSPerfAccumulate("s0_l1_enq_fire", s0_l1_valid)
45420e09ab1Shappy-lx  XSPerfAccumulate("s0_l1_enq_valid", io.l1_prefetch_req.valid)
45520e09ab1Shappy-lx  XSPerfAccumulate("s0_l1_cannot_enq", io.l1_prefetch_req.valid && !s0_l1_can_accept)
4560d32f713Shappy-lx
4570d32f713Shappy-lx  // s1: alloc or update
45820e09ab1Shappy-lx  val s1_l1_valid = RegNext(s0_l1_valid)
45920e09ab1Shappy-lx  val s1_l1_region = RegEnable(s0_l1_region, s0_l1_valid)
46020e09ab1Shappy-lx  val s1_l1_region_hash = RegEnable(s0_l1_region_hash, s0_l1_valid)
46120e09ab1Shappy-lx  val s1_l1_hit = RegEnable(s0_l1_hit, s0_l1_valid)
46220e09ab1Shappy-lx  val s1_l1_index = RegEnable(s0_l1_index, s0_l1_valid)
46320e09ab1Shappy-lx  val s1_l1_prefetch_req = RegEnable(s0_l1_prefetch_req, s0_l1_valid)
46420e09ab1Shappy-lx  val s1_l1_alloc = s1_l1_valid && !s1_l1_hit
46520e09ab1Shappy-lx  val s1_l1_update = s1_l1_valid && s1_l1_hit
46620e09ab1Shappy-lx  s0_l1_can_accept := !(s1_l1_valid && s1_l1_alloc && (s0_l1_region_hash === s1_l1_region_hash))
4670d32f713Shappy-lx
46820e09ab1Shappy-lx  when(s1_l1_alloc) {
46970eea123SYanqin Li    l1_valids(s1_l1_index) := true.B
47020e09ab1Shappy-lx    l1_array(s1_l1_index) := s1_l1_prefetch_req
47120e09ab1Shappy-lx  }.elsewhen(s1_l1_update) {
47220e09ab1Shappy-lx    l1_array(s1_l1_index).update(
47320e09ab1Shappy-lx      update_bit_vec = s1_l1_prefetch_req.bit_vec,
47420e09ab1Shappy-lx      update_sink = s1_l1_prefetch_req.sink
4750d32f713Shappy-lx    )
4760d32f713Shappy-lx  }
4770d32f713Shappy-lx
47820e09ab1Shappy-lx  XSPerfAccumulate("s1_l1_enq_valid", s1_l1_valid)
47920e09ab1Shappy-lx  XSPerfAccumulate("s1_l1_enq_alloc", s1_l1_alloc)
48020e09ab1Shappy-lx  XSPerfAccumulate("s1_l1_enq_update", s1_l1_update)
48120e09ab1Shappy-lx  XSPerfAccumulate("l1_hash_conflict", s0_l1_valid && RegNext(s1_l1_valid) && (s0_l1_region =/= RegNext(s1_l1_region)) && (s0_l1_region_hash === RegNext(s1_l1_region_hash)))
48270eea123SYanqin Li  XSPerfAccumulate("s1_l1_enq_evict_useful_entry", s1_l1_alloc && l1_array(s1_l1_index).can_send_pf(l1_valids(s1_l1_index)))
4830d32f713Shappy-lx
48420e09ab1Shappy-lx  // l2 l3 pf req enq
48520e09ab1Shappy-lx  // s0: hash tag match
48620e09ab1Shappy-lx  val s0_l2_can_accept = Wire(Bool())
48720e09ab1Shappy-lx  val s0_l2_valid = io.l2_l3_prefetch_req.valid && s0_l2_can_accept
48820e09ab1Shappy-lx  val s0_l2_region = io.l2_l3_prefetch_req.bits.region
48920e09ab1Shappy-lx  val s0_l2_region_hash = region_hash_tag(s0_l2_region)
49070eea123SYanqin Li  val s0_l2_match_vec = l2_array.zip(l2_valids).map{ case (e, v) => e.tag_match(v, s0_l2_valid, s0_l2_region_hash) }
49120e09ab1Shappy-lx  val s0_l2_hit = VecInit(s0_l2_match_vec).asUInt.orR
49220e09ab1Shappy-lx  val s0_l2_index = Wire(UInt(log2Up(MLP_L2L3_SIZE).W))
49320e09ab1Shappy-lx  val s0_l2_prefetch_req = (new MLPReqFilterBundle).fromStreamPrefetchReqBundle(io.l2_l3_prefetch_req.bits)
49420e09ab1Shappy-lx
49520e09ab1Shappy-lx  s0_l2_index := Mux(s0_l2_hit, OHToUInt(VecInit(s0_l2_match_vec).asUInt), l2_replacement.way(l2_real_replace_vec.reverse)._2)
49620e09ab1Shappy-lx
49720e09ab1Shappy-lx  when(s0_l2_valid) {
49820e09ab1Shappy-lx    l2_replacement.access(s0_l2_index)
49920e09ab1Shappy-lx  }
50020e09ab1Shappy-lx
50120e09ab1Shappy-lx  assert(!s0_l2_valid || PopCount(VecInit(s0_l2_match_vec)) <= 1.U, "req region should match no more than 1 entry")
50220e09ab1Shappy-lx
50320e09ab1Shappy-lx  XSPerfAccumulate("s0_l2_enq_fire", s0_l2_valid)
50420e09ab1Shappy-lx  XSPerfAccumulate("s0_l2_enq_valid", io.l2_l3_prefetch_req.valid)
50520e09ab1Shappy-lx  XSPerfAccumulate("s0_l2_cannot_enq", io.l2_l3_prefetch_req.valid && !s0_l2_can_accept)
50620e09ab1Shappy-lx
50720e09ab1Shappy-lx  // s1: alloc or update
50820e09ab1Shappy-lx  val s1_l2_valid = RegNext(s0_l2_valid)
50920e09ab1Shappy-lx  val s1_l2_region = RegEnable(s0_l2_region, s0_l2_valid)
51020e09ab1Shappy-lx  val s1_l2_region_hash = RegEnable(s0_l2_region_hash, s0_l2_valid)
51120e09ab1Shappy-lx  val s1_l2_hit = RegEnable(s0_l2_hit, s0_l2_valid)
51220e09ab1Shappy-lx  val s1_l2_index = RegEnable(s0_l2_index, s0_l2_valid)
51320e09ab1Shappy-lx  val s1_l2_prefetch_req = RegEnable(s0_l2_prefetch_req, s0_l2_valid)
51420e09ab1Shappy-lx  val s1_l2_alloc = s1_l2_valid && !s1_l2_hit
51520e09ab1Shappy-lx  val s1_l2_update = s1_l2_valid && s1_l2_hit
51620e09ab1Shappy-lx  s0_l2_can_accept := !(s1_l2_valid && s1_l2_alloc && (s0_l2_region_hash === s1_l2_region_hash))
51720e09ab1Shappy-lx
51820e09ab1Shappy-lx  when(s1_l2_alloc) {
51970eea123SYanqin Li    l2_valids(s1_l2_index) := true.B
52020e09ab1Shappy-lx    l2_array(s1_l2_index) := s1_l2_prefetch_req
52120e09ab1Shappy-lx  }.elsewhen(s1_l2_update) {
52220e09ab1Shappy-lx    l2_array(s1_l2_index).update(
52320e09ab1Shappy-lx      update_bit_vec = s1_l2_prefetch_req.bit_vec,
52420e09ab1Shappy-lx      update_sink = s1_l2_prefetch_req.sink
52520e09ab1Shappy-lx    )
52620e09ab1Shappy-lx  }
52720e09ab1Shappy-lx
52820e09ab1Shappy-lx  XSPerfAccumulate("s1_l2_enq_valid", s1_l2_valid)
52920e09ab1Shappy-lx  XSPerfAccumulate("s1_l2_enq_alloc", s1_l2_alloc)
53020e09ab1Shappy-lx  XSPerfAccumulate("s1_l2_enq_update", s1_l2_update)
53120e09ab1Shappy-lx  XSPerfAccumulate("l2_hash_conflict", s0_l2_valid && RegNext(s1_l2_valid) && (s0_l2_region =/= RegNext(s1_l2_region)) && (s0_l2_region_hash === RegNext(s1_l2_region_hash)))
53270eea123SYanqin Li  XSPerfAccumulate("s1_l2_enq_evict_useful_entry", s1_l2_alloc && l2_array(s1_l2_index).can_send_pf(l2_valids(s1_l2_index)))
53320e09ab1Shappy-lx
53420e09ab1Shappy-lx  // stream pf debug db here
53520e09ab1Shappy-lx  // Hit:
53620e09ab1Shappy-lx  // now seens only pending = (region_bits & ~filter_bits) are the peeding request
53720e09ab1Shappy-lx  // if a PfGen comes, new added request can be new_req = PfGen.region_bits & ~(pending)
53820e09ab1Shappy-lx  // Alloc:
53920e09ab1Shappy-lx  // new_req = PfGen.region_bits
54020e09ab1Shappy-lx  val stream_pf_trace_debug_table = ChiselDB.createTable("StreamPFTrace" + p(XSCoreParamsKey).HartId.toString, new StreamPFTraceInEntry, basicDB = false)
54120e09ab1Shappy-lx  for (i <- 0 until BIT_VEC_WITDH) {
54220e09ab1Shappy-lx    // l1 enq log
54320e09ab1Shappy-lx    val hit_entry = l1_array(s0_l1_index)
54420e09ab1Shappy-lx    val new_req = Mux(
54520e09ab1Shappy-lx      s0_l1_hit,
54620e09ab1Shappy-lx      io.l1_prefetch_req.bits.bit_vec & ~(hit_entry.bit_vec),
54720e09ab1Shappy-lx      io.l1_prefetch_req.bits.bit_vec
54820e09ab1Shappy-lx    )
54920e09ab1Shappy-lx    val log_enable = s0_l1_valid && new_req(i) && (io.l1_prefetch_req.bits.source.value === L1_HW_PREFETCH_STREAM)
55020e09ab1Shappy-lx    val log_data = Wire(new StreamPFTraceInEntry)
55120e09ab1Shappy-lx
55220e09ab1Shappy-lx    log_data.TriggerPC := io.l1_prefetch_req.bits.trigger_pc
55320e09ab1Shappy-lx    log_data.TriggerVaddr := io.l1_prefetch_req.bits.trigger_va
55420e09ab1Shappy-lx    log_data.PFVaddr := Cat(s0_l1_region, i.U(REGION_BITS.W), 0.U(log2Up(dcacheParameters.blockBytes).W))
55520e09ab1Shappy-lx    log_data.PFSink := s0_l1_prefetch_req.sink
55620e09ab1Shappy-lx
55720e09ab1Shappy-lx    stream_pf_trace_debug_table.log(
55820e09ab1Shappy-lx      data = log_data,
55920e09ab1Shappy-lx      en = log_enable,
56020e09ab1Shappy-lx      site = "StreamPFTrace",
56120e09ab1Shappy-lx      clock = clock,
56220e09ab1Shappy-lx      reset = reset
56320e09ab1Shappy-lx    )
56420e09ab1Shappy-lx  }
56520e09ab1Shappy-lx  for (i <- 0 until BIT_VEC_WITDH) {
56620e09ab1Shappy-lx    // l2 l3 enq log
56720e09ab1Shappy-lx    val hit_entry = l2_array(s0_l2_index)
56820e09ab1Shappy-lx    val new_req = Mux(
56920e09ab1Shappy-lx      s0_l2_hit,
57020e09ab1Shappy-lx      io.l2_l3_prefetch_req.bits.bit_vec & ~(hit_entry.bit_vec),
57120e09ab1Shappy-lx      io.l2_l3_prefetch_req.bits.bit_vec
57220e09ab1Shappy-lx    )
57320e09ab1Shappy-lx    val log_enable = s0_l2_valid && new_req(i) && (io.l2_l3_prefetch_req.bits.source.value === L1_HW_PREFETCH_STREAM)
57420e09ab1Shappy-lx    val log_data = Wire(new StreamPFTraceInEntry)
57520e09ab1Shappy-lx
57620e09ab1Shappy-lx    log_data.TriggerPC := io.l2_l3_prefetch_req.bits.trigger_pc
57720e09ab1Shappy-lx    log_data.TriggerVaddr := io.l2_l3_prefetch_req.bits.trigger_va
57820e09ab1Shappy-lx    log_data.PFVaddr := Cat(s0_l2_region, i.U(REGION_BITS.W), 0.U(log2Up(dcacheParameters.blockBytes).W))
57920e09ab1Shappy-lx    log_data.PFSink := s0_l2_prefetch_req.sink
58020e09ab1Shappy-lx
58120e09ab1Shappy-lx    stream_pf_trace_debug_table.log(
58220e09ab1Shappy-lx      data = log_data,
58320e09ab1Shappy-lx      en = log_enable,
58420e09ab1Shappy-lx      site = "StreamPFTrace",
58520e09ab1Shappy-lx      clock = clock,
58620e09ab1Shappy-lx      reset = reset
58720e09ab1Shappy-lx    )
58820e09ab1Shappy-lx  }
5890d32f713Shappy-lx
5900d32f713Shappy-lx  // tlb req
5910d32f713Shappy-lx  // s0: arb all tlb reqs
5920d32f713Shappy-lx  val s0_tlb_fire_vec = VecInit((0 until MLP_SIZE).map{case i => tlb_req_arb.io.in(i).fire})
5934ccb2e8bSYanqin Li  val s1_tlb_fire_vec = GatedValidRegNext(s0_tlb_fire_vec)
5944ccb2e8bSYanqin Li  val s2_tlb_fire_vec = GatedValidRegNext(s1_tlb_fire_vec)
59525a80bceSYanqin Li  val s3_tlb_fire_vec = GatedValidRegNext(s2_tlb_fire_vec)
59625a80bceSYanqin Li  val not_tlbing_vec = VecInit((0 until MLP_SIZE).map{case i =>
59725a80bceSYanqin Li    !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !s3_tlb_fire_vec(i)
59825a80bceSYanqin Li  })
5990d32f713Shappy-lx
6000d32f713Shappy-lx  for(i <- 0 until MLP_SIZE) {
60120e09ab1Shappy-lx    val l1_evict = s1_l1_alloc && (s1_l1_index === i.U)
60220e09ab1Shappy-lx    val l2_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === i.U)
60320e09ab1Shappy-lx    if(i < MLP_L1_SIZE) {
60425a80bceSYanqin Li      tlb_req_arb.io.in(i).valid := l1_valids(i) && l1_array(i).is_vaddr && not_tlbing_vec(i) && !l1_evict
60520e09ab1Shappy-lx      tlb_req_arb.io.in(i).bits.vaddr := l1_array(i).get_tlb_va()
60620e09ab1Shappy-lx    }else {
60725a80bceSYanqin Li      tlb_req_arb.io.in(i).valid := l2_valids(i - MLP_L1_SIZE) && l2_array(i - MLP_L1_SIZE).is_vaddr && not_tlbing_vec(i) && !l2_evict
60820e09ab1Shappy-lx      tlb_req_arb.io.in(i).bits.vaddr := l2_array(i - MLP_L1_SIZE).get_tlb_va()
60920e09ab1Shappy-lx    }
6100d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read
6118a4dab4dSHaoyuan Feng    tlb_req_arb.io.in(i).bits.isPrefetch := true.B
6120d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.size := 3.U
6130d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.kill := false.B
6140d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.no_translate := false.B
615db6cfb5aSHaoyuan Feng    tlb_req_arb.io.in(i).bits.fullva := 0.U
616db6cfb5aSHaoyuan Feng    tlb_req_arb.io.in(i).bits.checkfullva := false.B
6170d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.memidx := DontCare
6180d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.debug := DontCare
6193d951cfaSpeixiaokun    tlb_req_arb.io.in(i).bits.hlvx := DontCare
6203d951cfaSpeixiaokun    tlb_req_arb.io.in(i).bits.hyperinst := DontCare
621149a2326Sweiding liu    tlb_req_arb.io.in(i).bits.pmp_addr  := DontCare
6220d32f713Shappy-lx  }
6230d32f713Shappy-lx
6240d32f713Shappy-lx  assert(PopCount(s0_tlb_fire_vec) <= 1.U, "s0_tlb_fire_vec should be one-hot or empty")
6250d32f713Shappy-lx
6260d32f713Shappy-lx  // s1: send out the req
6274ccb2e8bSYanqin Li  val s1_tlb_req_valid = GatedValidRegNext(tlb_req_arb.io.out.valid)
6280d32f713Shappy-lx  val s1_tlb_req_bits = RegEnable(tlb_req_arb.io.out.bits, tlb_req_arb.io.out.valid)
6290d32f713Shappy-lx  val s1_tlb_req_index = RegEnable(OHToUInt(s0_tlb_fire_vec.asUInt), tlb_req_arb.io.out.valid)
63020e09ab1Shappy-lx  val s1_l1_tlb_evict = s1_l1_alloc && (s1_l1_index === s1_tlb_req_index)
63120e09ab1Shappy-lx  val s1_l2_tlb_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === s1_tlb_req_index)
63220e09ab1Shappy-lx  val s1_tlb_evict = s1_l1_tlb_evict || s1_l2_tlb_evict
6330d32f713Shappy-lx  io.tlb_req.req.valid := s1_tlb_req_valid && !s1_tlb_evict
6340d32f713Shappy-lx  io.tlb_req.req.bits := s1_tlb_req_bits
6350d32f713Shappy-lx  io.tlb_req.req_kill := false.B
6360d32f713Shappy-lx  tlb_req_arb.io.out.ready := true.B
6370d32f713Shappy-lx
6380d32f713Shappy-lx  XSPerfAccumulate("s1_tlb_req_sent", io.tlb_req.req.valid)
6390d32f713Shappy-lx  XSPerfAccumulate("s1_tlb_req_evict", s1_tlb_req_valid && s1_tlb_evict)
6400d32f713Shappy-lx
6410d32f713Shappy-lx  // s2: get response from tlb
64225a80bceSYanqin Li  val s2_tlb_resp_valid = io.tlb_req.resp.valid
64325a80bceSYanqin Li  val s2_tlb_resp = io.tlb_req.resp.bits
6440d32f713Shappy-lx  val s2_tlb_update_index = RegEnable(s1_tlb_req_index, s1_tlb_req_valid)
64520e09ab1Shappy-lx  val s2_l1_tlb_evict = s1_l1_alloc && (s1_l1_index === s2_tlb_update_index)
64620e09ab1Shappy-lx  val s2_l2_tlb_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === s2_tlb_update_index)
64720e09ab1Shappy-lx  val s2_tlb_evict = s2_l1_tlb_evict || s2_l2_tlb_evict
6480d32f713Shappy-lx
64925a80bceSYanqin Li  // s3: get pmp response form PMPChecker
65025a80bceSYanqin Li  val s3_tlb_resp_valid = RegNext(s2_tlb_resp_valid)
65125a80bceSYanqin Li  val s3_tlb_resp = RegEnable(s2_tlb_resp, s2_tlb_resp_valid)
65225a80bceSYanqin Li  val s3_tlb_update_index = RegEnable(s2_tlb_update_index, s2_tlb_resp_valid)
65325a80bceSYanqin Li  val s3_tlb_evict = RegNext(s2_tlb_evict)
65425a80bceSYanqin Li  val s3_pmp_resp = io.pmp_resp
65525a80bceSYanqin Li  val s3_update_valid = s3_tlb_resp_valid && !s3_tlb_evict && !s3_tlb_resp.miss
65625a80bceSYanqin Li  val s3_drop = s3_update_valid && (
65725a80bceSYanqin Li    // page/access fault
65825a80bceSYanqin Li    s3_tlb_resp.excp.head.pf.ld || s3_tlb_resp.excp.head.gpf.ld || s3_tlb_resp.excp.head.af.ld ||
65925a80bceSYanqin Li    // uncache
66025a80bceSYanqin Li    s3_pmp_resp.mmio || Pbmt.isUncache(s3_tlb_resp.pbmt.head) ||
66125a80bceSYanqin Li    // pmp access fault
66225a80bceSYanqin Li    s3_pmp_resp.ld
66325a80bceSYanqin Li  )
66425a80bceSYanqin Li  when(s3_tlb_resp_valid && !s3_tlb_evict) {
66525a80bceSYanqin Li    when(s3_tlb_update_index < MLP_L1_SIZE.U) {
66625a80bceSYanqin Li      l1_array(s3_tlb_update_index).is_vaddr := s3_tlb_resp.miss
66725a80bceSYanqin Li
66825a80bceSYanqin Li      when(!s3_tlb_resp.miss) {
66925a80bceSYanqin Li        l1_array(s3_tlb_update_index).region := Cat(0.U((VAddrBits - PAddrBits).W), s3_tlb_resp.paddr.head(s3_tlb_resp.paddr.head.getWidth - 1, REGION_TAG_OFFSET))
67025a80bceSYanqin Li        when(s3_drop) {
67125a80bceSYanqin Li          invalid_array(s3_tlb_update_index, false)
67220e09ab1Shappy-lx        }
67320e09ab1Shappy-lx      }
67420e09ab1Shappy-lx    }.otherwise {
67525a80bceSYanqin Li      val inner_index = s3_tlb_update_index - MLP_L1_SIZE.U
67625a80bceSYanqin Li      l2_array(inner_index).is_vaddr := s3_tlb_resp.miss
67720e09ab1Shappy-lx
67825a80bceSYanqin Li      when(!s3_tlb_resp.miss) {
67925a80bceSYanqin Li        l2_array(inner_index).region := Cat(0.U((VAddrBits - PAddrBits).W), s3_tlb_resp.paddr.head(s3_tlb_resp.paddr.head.getWidth - 1, REGION_TAG_OFFSET))
68025a80bceSYanqin Li        when(s3_drop) {
68170eea123SYanqin Li          invalid_array(inner_index, true)
68220e09ab1Shappy-lx        }
6830d32f713Shappy-lx      }
6840d32f713Shappy-lx    }
6850d32f713Shappy-lx  }
68625a80bceSYanqin Li  io.tlb_req.resp.ready := true.B
6870d32f713Shappy-lx
68825a80bceSYanqin Li  XSPerfAccumulate("s3_tlb_resp_valid", s3_tlb_resp_valid)
68925a80bceSYanqin Li  XSPerfAccumulate("s3_tlb_resp_evict", s3_tlb_resp_valid && s3_tlb_evict)
69025a80bceSYanqin Li  XSPerfAccumulate("s3_tlb_resp_miss", s3_tlb_resp_valid && !s3_tlb_evict && s3_tlb_resp.miss)
69125a80bceSYanqin Li  XSPerfAccumulate("s3_tlb_resp_updated", s3_update_valid)
69225a80bceSYanqin Li  XSPerfAccumulate("s3_tlb_resp_page_fault", s3_update_valid && s3_tlb_resp.excp.head.pf.ld)
69325a80bceSYanqin Li  XSPerfAccumulate("s3_tlb_resp_guestpage_fault", s3_update_valid && s3_tlb_resp.excp.head.gpf.ld)
69425a80bceSYanqin Li  XSPerfAccumulate("s3_tlb_resp_access_fault", s3_update_valid && s3_tlb_resp.excp.head.af.ld)
69525a80bceSYanqin Li  XSPerfAccumulate("s3_tlb_resp_pmp_access_fault", s3_update_valid && s3_pmp_resp.ld)
69625a80bceSYanqin Li  XSPerfAccumulate("s3_tlb_resp_uncache", s3_update_valid && (Pbmt.isUncache(s3_tlb_resp.pbmt.head) || s3_pmp_resp.mmio))
6970d32f713Shappy-lx
6980d32f713Shappy-lx  // l1 pf
6990d32f713Shappy-lx  // s0: generate prefetch req paddr per entry, arb them
70020e09ab1Shappy-lx  val s0_pf_fire_vec = VecInit((0 until MLP_L1_SIZE).map{case i => l1_pf_req_arb.io.in(i).fire})
7014ccb2e8bSYanqin Li  val s1_pf_fire_vec = GatedValidRegNext(s0_pf_fire_vec)
7020d32f713Shappy-lx
7030d32f713Shappy-lx  val s0_pf_fire = l1_pf_req_arb.io.out.fire
704cd2ff98bShappy-lx  val s0_pf_index = l1_pf_req_arb.io.chosen
70520e09ab1Shappy-lx  val s0_pf_candidate_oh = get_candidate_oh(l1_pf_req_arb.io.out.bits.req.paddr)
7060d32f713Shappy-lx
70720e09ab1Shappy-lx  for(i <- 0 until MLP_L1_SIZE) {
70820e09ab1Shappy-lx    val evict = s1_l1_alloc && (s1_l1_index === i.U)
70970eea123SYanqin Li    l1_pf_req_arb.io.in(i).valid := l1_array(i).can_send_pf(l1_valids(i)) && !evict
71020e09ab1Shappy-lx    l1_pf_req_arb.io.in(i).bits.req.paddr := l1_array(i).get_pf_addr()
71120e09ab1Shappy-lx    l1_pf_req_arb.io.in(i).bits.req.alias := l1_array(i).alias
71220e09ab1Shappy-lx    l1_pf_req_arb.io.in(i).bits.req.confidence := io.confidence
71320e09ab1Shappy-lx    l1_pf_req_arb.io.in(i).bits.req.is_store := false.B
71420e09ab1Shappy-lx    l1_pf_req_arb.io.in(i).bits.req.pf_source := l1_array(i).source
71520e09ab1Shappy-lx    l1_pf_req_arb.io.in(i).bits.debug_vaddr := l1_array(i).get_pf_debug_vaddr()
7160d32f713Shappy-lx  }
7170d32f713Shappy-lx
7180d32f713Shappy-lx  when(s0_pf_fire) {
71920e09ab1Shappy-lx    l1_array(s0_pf_index).sent_vec := l1_array(s0_pf_index).sent_vec | s0_pf_candidate_oh
7200d32f713Shappy-lx  }
7210d32f713Shappy-lx
7220d32f713Shappy-lx  assert(PopCount(s0_pf_fire_vec) <= 1.U, "s0_pf_fire_vec should be one-hot or empty")
7230d32f713Shappy-lx
7240d32f713Shappy-lx  // s1: send out to dcache
7250d32f713Shappy-lx  val s1_pf_valid = Reg(Bool())
7260d32f713Shappy-lx  val s1_pf_bits = RegEnable(l1_pf_req_arb.io.out.bits, l1_pf_req_arb.io.out.fire)
7270d32f713Shappy-lx  val s1_pf_index = RegEnable(s0_pf_index, l1_pf_req_arb.io.out.fire)
7280d32f713Shappy-lx  val s1_pf_candidate_oh = RegEnable(s0_pf_candidate_oh, l1_pf_req_arb.io.out.fire)
72920e09ab1Shappy-lx  val s1_pf_evict = s1_l1_alloc && (s1_l1_index === s1_pf_index)
73020e09ab1Shappy-lx  val s1_pf_update = s1_l1_update && (s1_l1_index === s1_pf_index)
7310d32f713Shappy-lx  val s1_pf_can_go = io.l1_req.ready && !s1_pf_evict && !s1_pf_update
7320d32f713Shappy-lx  val s1_pf_fire = s1_pf_valid && s1_pf_can_go
7330d32f713Shappy-lx
7340d32f713Shappy-lx  when(s1_pf_can_go) {
7350d32f713Shappy-lx    s1_pf_valid := false.B
7360d32f713Shappy-lx  }
7370d32f713Shappy-lx
7380d32f713Shappy-lx  when(l1_pf_req_arb.io.out.fire) {
7390d32f713Shappy-lx    s1_pf_valid := true.B
7400d32f713Shappy-lx  }
7410d32f713Shappy-lx
7420d32f713Shappy-lx  when(s1_pf_fire) {
74320e09ab1Shappy-lx    l1_array(s1_pf_index).bit_vec := l1_array(s1_pf_index).bit_vec & ~s1_pf_candidate_oh
7440d32f713Shappy-lx  }
7450d32f713Shappy-lx
7465bd65c56STang Haojin  val in_pmem = PmemRanges.map(_.cover(s1_pf_bits.req.paddr)).reduce(_ || _)
74745def856STang Haojin  io.l1_req.valid := s1_pf_valid && !s1_pf_evict && !s1_pf_update && in_pmem && io.enable
74820e09ab1Shappy-lx  io.l1_req.bits := s1_pf_bits.req
7490d32f713Shappy-lx
7500d32f713Shappy-lx  l1_pf_req_arb.io.out.ready := s1_pf_can_go || !s1_pf_valid
7510d32f713Shappy-lx
75220e09ab1Shappy-lx  assert(!((s1_l1_alloc || s1_l1_update) && s1_pf_fire && (s1_l1_index === s1_pf_index)), "pf pipeline & enq pipeline bit_vec harzard!")
7530d32f713Shappy-lx
7540d32f713Shappy-lx  XSPerfAccumulate("s1_pf_valid", s1_pf_valid)
7550d32f713Shappy-lx  XSPerfAccumulate("s1_pf_block_by_pipe_unready", s1_pf_valid && !io.l1_req.ready)
7560d32f713Shappy-lx  XSPerfAccumulate("s1_pf_block_by_enq_alloc_harzard", s1_pf_valid && s1_pf_evict)
7570d32f713Shappy-lx  XSPerfAccumulate("s1_pf_block_by_enq_update_harzard", s1_pf_valid && s1_pf_update)
7580d32f713Shappy-lx  XSPerfAccumulate("s1_pf_fire", s1_pf_fire)
7590d32f713Shappy-lx
7600d32f713Shappy-lx  // l2 pf
7610d32f713Shappy-lx  // s0: generate prefetch req paddr per entry, arb them, sent out
7620d32f713Shappy-lx  io.l2_pf_addr.valid := l2_pf_req_arb.io.out.valid
76320e09ab1Shappy-lx  io.l2_pf_addr.bits := l2_pf_req_arb.io.out.bits.req
7640d32f713Shappy-lx
7650d32f713Shappy-lx  l2_pf_req_arb.io.out.ready := true.B
7660d32f713Shappy-lx
76720e09ab1Shappy-lx  for(i <- 0 until MLP_L2L3_SIZE) {
76820e09ab1Shappy-lx    val evict = s1_l2_alloc && (s1_l2_index === i.U)
76970eea123SYanqin Li    l2_pf_req_arb.io.in(i).valid := l2_array(i).can_send_pf(l2_valids(i)) && (l2_array(i).sink === SINK_L2) && !evict
77020e09ab1Shappy-lx    l2_pf_req_arb.io.in(i).bits.req.addr := l2_array(i).get_pf_addr()
77120e09ab1Shappy-lx    l2_pf_req_arb.io.in(i).bits.req.source := MuxLookup(l2_array(i).source.value, MemReqSource.Prefetch2L2Unknown.id.U)(Seq(
7720d32f713Shappy-lx      L1_HW_PREFETCH_STRIDE -> MemReqSource.Prefetch2L2Stride.id.U,
7730d32f713Shappy-lx      L1_HW_PREFETCH_STREAM -> MemReqSource.Prefetch2L2Stream.id.U
7740d32f713Shappy-lx    ))
77520e09ab1Shappy-lx    l2_pf_req_arb.io.in(i).bits.debug_vaddr := l2_array(i).get_pf_debug_vaddr()
7760d32f713Shappy-lx  }
7770d32f713Shappy-lx
7780d32f713Shappy-lx  when(l2_pf_req_arb.io.out.valid) {
77920e09ab1Shappy-lx    l2_array(l2_pf_req_arb.io.chosen).sent_vec := l2_array(l2_pf_req_arb.io.chosen).sent_vec | get_candidate_oh(l2_pf_req_arb.io.out.bits.req.addr)
7800d32f713Shappy-lx  }
7810d32f713Shappy-lx
78220e09ab1Shappy-lx  val stream_out_debug_table = ChiselDB.createTable("StreamPFTraceOut" + p(XSCoreParamsKey).HartId.toString, new StreamPFTraceOutEntry, basicDB = false)
78320e09ab1Shappy-lx  val l1_debug_data = Wire(new StreamPFTraceOutEntry)
78420e09ab1Shappy-lx  val l2_debug_data = Wire(new StreamPFTraceOutEntry)
78520e09ab1Shappy-lx  l1_debug_data.PFVaddr := l1_pf_req_arb.io.out.bits.debug_vaddr
78620e09ab1Shappy-lx  l1_debug_data.PFSink := SINK_L1
78720e09ab1Shappy-lx  l2_debug_data.PFVaddr := l2_pf_req_arb.io.out.bits.debug_vaddr
78820e09ab1Shappy-lx  l2_debug_data.PFSink := SINK_L2
78920e09ab1Shappy-lx
79020e09ab1Shappy-lx  stream_out_debug_table.log(
79120e09ab1Shappy-lx    data = l1_debug_data,
79220e09ab1Shappy-lx    en = l1_pf_req_arb.io.out.fire && (l1_pf_req_arb.io.out.bits.req.pf_source.value === L1_HW_PREFETCH_STREAM),
79320e09ab1Shappy-lx    site = "StreamPFTraceOut",
79420e09ab1Shappy-lx    clock = clock,
79520e09ab1Shappy-lx    reset = reset
79620e09ab1Shappy-lx  )
79720e09ab1Shappy-lx  stream_out_debug_table.log(
79820e09ab1Shappy-lx    data = l2_debug_data,
79920e09ab1Shappy-lx    en = l2_pf_req_arb.io.out.fire && (l2_pf_req_arb.io.out.bits.req.source === MemReqSource.Prefetch2L2Stream.id.U),
80020e09ab1Shappy-lx    site = "StreamPFTraceOut",
80120e09ab1Shappy-lx    clock = clock,
80220e09ab1Shappy-lx    reset = reset
80320e09ab1Shappy-lx  )
80420e09ab1Shappy-lx
8050d32f713Shappy-lx  // last level cache pf
8060d32f713Shappy-lx  // s0: generate prefetch req paddr per entry, arb them, sent out
8070d32f713Shappy-lx  io.l3_pf_addr.valid := l3_pf_req_arb.io.out.valid
8080d32f713Shappy-lx  io.l3_pf_addr.bits := l3_pf_req_arb.io.out.bits
8090d32f713Shappy-lx
8100d32f713Shappy-lx  l3_pf_req_arb.io.out.ready := true.B
8110d32f713Shappy-lx
81220e09ab1Shappy-lx  for(i <- 0 until MLP_L2L3_SIZE) {
81320e09ab1Shappy-lx    val evict = s1_l2_alloc && (s1_l2_index === i.U)
81470eea123SYanqin Li    l3_pf_req_arb.io.in(i).valid := l2_array(i).can_send_pf(l2_valids(i)) && (l2_array(i).sink === SINK_L3) && !evict
81520e09ab1Shappy-lx    l3_pf_req_arb.io.in(i).bits := l2_array(i).get_pf_addr()
8160d32f713Shappy-lx  }
8170d32f713Shappy-lx
8180d32f713Shappy-lx  when(l3_pf_req_arb.io.out.valid) {
81920e09ab1Shappy-lx    l2_array(l3_pf_req_arb.io.chosen).sent_vec := l2_array(l3_pf_req_arb.io.chosen).sent_vec | get_candidate_oh(l3_pf_req_arb.io.out.bits)
8200d32f713Shappy-lx  }
8210d32f713Shappy-lx
8220d32f713Shappy-lx  // reset meta to avoid muti-hit problem
8230d32f713Shappy-lx  for(i <- 0 until MLP_SIZE) {
82420e09ab1Shappy-lx    if(i < MLP_L1_SIZE) {
82570eea123SYanqin Li      when(RegNext(io.flush)) {
82670eea123SYanqin Li        reset_array(i, false)
82720e09ab1Shappy-lx      }
82820e09ab1Shappy-lx    }else {
82970eea123SYanqin Li      when(RegNext(io.flush)) {
83070eea123SYanqin Li        reset_array(i - MLP_L1_SIZE, true)
83120e09ab1Shappy-lx      }
8320d32f713Shappy-lx    }
8330d32f713Shappy-lx  }
8340d32f713Shappy-lx
8350d32f713Shappy-lx  XSPerfAccumulate("l2_prefetche_queue_busby", io.l2PfqBusy)
83670eea123SYanqin Li  XSPerfHistogram("filter_active", PopCount(VecInit(
83770eea123SYanqin Li    l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v) } ++
83870eea123SYanqin Li    l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) }
83970eea123SYanqin Li    ).asUInt), true.B, 0, MLP_SIZE, 1)
84070eea123SYanqin Li  XSPerfHistogram("l1_filter_active", PopCount(VecInit(l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v)}).asUInt), true.B, 0, MLP_L1_SIZE, 1)
84170eea123SYanqin Li  XSPerfHistogram("l2_filter_active", PopCount(VecInit(l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) && (e.sink === SINK_L2)}).asUInt), true.B, 0, MLP_L2L3_SIZE, 1)
84270eea123SYanqin Li  XSPerfHistogram("l3_filter_active", PopCount(VecInit(l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) && (e.sink === SINK_L3)}).asUInt), true.B, 0, MLP_L2L3_SIZE, 1)
8430d32f713Shappy-lx}
8440d32f713Shappy-lx
8450d32f713Shappy-lxclass L1Prefetcher(implicit p: Parameters) extends BasePrefecher with HasStreamPrefetchHelper with HasStridePrefetchHelper {
8460d32f713Shappy-lx  val pf_ctrl = IO(Input(new PrefetchControlBundle))
847*99ce5576Scz4e  val stride_train = IO(Flipped(Vec(backendParams.LduCnt + backendParams.HyuCnt, ValidIO(new LsPrefetchTrainBundle()))))
8480d32f713Shappy-lx  val l2PfqBusy = IO(Input(Bool()))
8490d32f713Shappy-lx
8500d32f713Shappy-lx  val stride_train_filter = Module(new TrainFilter(STRIDE_FILTER_SIZE, "stride"))
8510d32f713Shappy-lx  val stride_meta_array = Module(new StrideMetaArray)
8520d32f713Shappy-lx  val stream_train_filter = Module(new TrainFilter(STREAM_FILTER_SIZE, "stream"))
8530d32f713Shappy-lx  val stream_bit_vec_array = Module(new StreamBitVectorArray)
8540d32f713Shappy-lx  val pf_queue_filter = Module(new MutiLevelPrefetchFilter)
8550d32f713Shappy-lx
8560d32f713Shappy-lx  // for now, if the stream is disabled, train and prefetch process will continue, without sending out and reqs
8570d32f713Shappy-lx  val enable = io.enable
8580d32f713Shappy-lx  val flush = pf_ctrl.flush
8590d32f713Shappy-lx
8600d32f713Shappy-lx  stream_train_filter.io.ld_in.zipWithIndex.foreach {
8610d32f713Shappy-lx    case (ld_in, i) => {
8620d32f713Shappy-lx      ld_in.valid := io.ld_in(i).valid && enable
8630d32f713Shappy-lx      ld_in.bits := io.ld_in(i).bits
8640d32f713Shappy-lx    }
8650d32f713Shappy-lx  }
8660d32f713Shappy-lx  stream_train_filter.io.enable := enable
8670d32f713Shappy-lx  stream_train_filter.io.flush := flush
8680d32f713Shappy-lx
8690d32f713Shappy-lx  stride_train_filter.io.ld_in.zipWithIndex.foreach {
8700d32f713Shappy-lx    case (ld_in, i) => {
8710d32f713Shappy-lx      ld_in.valid := stride_train(i).valid && enable
8720d32f713Shappy-lx      ld_in.bits := stride_train(i).bits
8730d32f713Shappy-lx    }
8740d32f713Shappy-lx  }
8750d32f713Shappy-lx  stride_train_filter.io.enable := enable
8760d32f713Shappy-lx  stride_train_filter.io.flush := flush
8770d32f713Shappy-lx
8780d32f713Shappy-lx  stream_bit_vec_array.io.enable := enable
8790d32f713Shappy-lx  stream_bit_vec_array.io.flush := flush
8800d32f713Shappy-lx  stream_bit_vec_array.io.dynamic_depth := pf_ctrl.dynamic_depth
8810d32f713Shappy-lx  stream_bit_vec_array.io.train_req <> stream_train_filter.io.train_req
8820d32f713Shappy-lx
8830d32f713Shappy-lx  stride_meta_array.io.enable := enable
8840d32f713Shappy-lx  stride_meta_array.io.flush := flush
8850d32f713Shappy-lx  stride_meta_array.io.dynamic_depth := 0.U
8860d32f713Shappy-lx  stride_meta_array.io.train_req <> stride_train_filter.io.train_req
8870d32f713Shappy-lx  stride_meta_array.io.stream_lookup_req <> stream_bit_vec_array.io.stream_lookup_req
8880d32f713Shappy-lx  stride_meta_array.io.stream_lookup_resp <> stream_bit_vec_array.io.stream_lookup_resp
8890d32f713Shappy-lx
8900d32f713Shappy-lx  // stream has higher priority than stride
89120e09ab1Shappy-lx  pf_queue_filter.io.l1_prefetch_req.valid := stream_bit_vec_array.io.l1_prefetch_req.valid || stride_meta_array.io.l1_prefetch_req.valid
89220e09ab1Shappy-lx  pf_queue_filter.io.l1_prefetch_req.bits := Mux(
89320e09ab1Shappy-lx    stream_bit_vec_array.io.l1_prefetch_req.valid,
89420e09ab1Shappy-lx    stream_bit_vec_array.io.l1_prefetch_req.bits,
89520e09ab1Shappy-lx    stride_meta_array.io.l1_prefetch_req.bits
89620e09ab1Shappy-lx  )
89720e09ab1Shappy-lx
89820e09ab1Shappy-lx  pf_queue_filter.io.l2_l3_prefetch_req.valid := stream_bit_vec_array.io.l2_l3_prefetch_req.valid || stride_meta_array.io.l2_l3_prefetch_req.valid
89920e09ab1Shappy-lx  pf_queue_filter.io.l2_l3_prefetch_req.bits := Mux(
90020e09ab1Shappy-lx    stream_bit_vec_array.io.l2_l3_prefetch_req.valid,
90120e09ab1Shappy-lx    stream_bit_vec_array.io.l2_l3_prefetch_req.bits,
90220e09ab1Shappy-lx    stride_meta_array.io.l2_l3_prefetch_req.bits
9030d32f713Shappy-lx  )
9040d32f713Shappy-lx
9050d32f713Shappy-lx  io.l1_req.valid := pf_queue_filter.io.l1_req.valid && enable && pf_ctrl.enable
9060d32f713Shappy-lx  io.l1_req.bits := pf_queue_filter.io.l1_req.bits
9070d32f713Shappy-lx
9080d32f713Shappy-lx  pf_queue_filter.io.l1_req.ready := Mux(pf_ctrl.enable, io.l1_req.ready, true.B)
9090d32f713Shappy-lx  pf_queue_filter.io.tlb_req <> io.tlb_req
91025a80bceSYanqin Li  pf_queue_filter.io.pmp_resp := io.pmp_resp
9110d32f713Shappy-lx  pf_queue_filter.io.enable := enable
9120d32f713Shappy-lx  pf_queue_filter.io.flush := flush
9130d32f713Shappy-lx  pf_queue_filter.io.confidence := pf_ctrl.confidence
9140d32f713Shappy-lx  pf_queue_filter.io.l2PfqBusy := l2PfqBusy
9150d32f713Shappy-lx
9165bd65c56STang Haojin  val l2_in_pmem = PmemRanges.map(_.cover(pf_queue_filter.io.l2_pf_addr.bits.addr)).reduce(_ || _)
91745def856STang Haojin  io.l2_req.valid := pf_queue_filter.io.l2_pf_addr.valid && l2_in_pmem && enable && pf_ctrl.enable
9180d32f713Shappy-lx  io.l2_req.bits := pf_queue_filter.io.l2_pf_addr.bits
9190d32f713Shappy-lx
9205bd65c56STang Haojin  val l3_in_pmem = PmemRanges.map(_.cover(pf_queue_filter.io.l3_pf_addr.bits)).reduce(_ || _)
92145def856STang Haojin  io.l3_req.valid := pf_queue_filter.io.l3_pf_addr.valid && l3_in_pmem && enable && pf_ctrl.enable
9220d32f713Shappy-lx  io.l3_req.bits := pf_queue_filter.io.l3_pf_addr.bits
9230d32f713Shappy-lx}
924