xref: /XiangShan/src/main/scala/xiangshan/mem/prefetch/L1PrefetchComponent.scala (revision 8891a219bbc84f568e1d134854d8d5ed86d6d560)
10d32f713Shappy-lxpackage xiangshan.mem.prefetch
20d32f713Shappy-lx
3*8891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters
40d32f713Shappy-lximport chisel3._
50d32f713Shappy-lximport chisel3.util._
60d32f713Shappy-lximport xiangshan._
70d32f713Shappy-lximport utils._
80d32f713Shappy-lximport utility._
90d32f713Shappy-lximport xiangshan.cache.HasDCacheParameters
100d32f713Shappy-lximport xiangshan.cache.mmu._
110d32f713Shappy-lximport xiangshan.mem.{L1PrefetchReq, LdPrefetchTrainBundle}
120d32f713Shappy-lximport xiangshan.mem.trace._
130d32f713Shappy-lximport xiangshan.mem.L1PrefetchSource
140d32f713Shappy-lx
150d32f713Shappy-lxtrait HasL1PrefetchHelper extends HasCircularQueuePtrHelper with HasDCacheParameters {
160d32f713Shappy-lx  // region related
170d32f713Shappy-lx  val REGION_SIZE = 1024
180d32f713Shappy-lx  val PAGE_OFFSET = 12
190d32f713Shappy-lx  val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes)
200d32f713Shappy-lx  val BIT_VEC_WITDH = REGION_SIZE / dcacheParameters.blockBytes
210d32f713Shappy-lx  val REGION_BITS = log2Up(BIT_VEC_WITDH)
220d32f713Shappy-lx  val REGION_TAG_OFFSET = BLOCK_OFFSET + REGION_BITS
230d32f713Shappy-lx  val REGION_TAG_BITS = VAddrBits - BLOCK_OFFSET - REGION_BITS
240d32f713Shappy-lx
250d32f713Shappy-lx  // hash related
260d32f713Shappy-lx  val VADDR_HASH_WIDTH = 5
270d32f713Shappy-lx  val BLK_ADDR_RAW_WIDTH = 10
280d32f713Shappy-lx  val HASH_TAG_WIDTH = VADDR_HASH_WIDTH + BLK_ADDR_RAW_WIDTH
290d32f713Shappy-lx
300d32f713Shappy-lx  // capacity related
310d32f713Shappy-lx  val MLP_SIZE = 16
320d32f713Shappy-lx
330d32f713Shappy-lx  // prefetch sink related
340d32f713Shappy-lx  val SINK_BITS = 2
350d32f713Shappy-lx  def SINK_L1 = "b00".U
360d32f713Shappy-lx  def SINK_L2 = "b01".U
370d32f713Shappy-lx  def SINK_L3 = "b10".U
380d32f713Shappy-lx
390d32f713Shappy-lx  // vaddr: |       region tag        |  region bits  | block offset |
400d32f713Shappy-lx  def get_region_tag(vaddr: UInt) = {
410d32f713Shappy-lx    require(vaddr.getWidth == VAddrBits)
420d32f713Shappy-lx    vaddr(vaddr.getWidth - 1, REGION_TAG_OFFSET)
430d32f713Shappy-lx  }
440d32f713Shappy-lx
450d32f713Shappy-lx  def get_region_bits(vaddr: UInt) = {
460d32f713Shappy-lx    require(vaddr.getWidth == VAddrBits)
470d32f713Shappy-lx    vaddr(REGION_TAG_OFFSET - 1, BLOCK_OFFSET)
480d32f713Shappy-lx  }
490d32f713Shappy-lx
500d32f713Shappy-lx  def block_addr(x: UInt): UInt = {
510d32f713Shappy-lx    x(x.getWidth - 1, BLOCK_OFFSET)
520d32f713Shappy-lx  }
530d32f713Shappy-lx
540d32f713Shappy-lx  def vaddr_hash(x: UInt): UInt = {
550d32f713Shappy-lx    val width = VADDR_HASH_WIDTH
560d32f713Shappy-lx    val low = x(width - 1, 0)
570d32f713Shappy-lx    val mid = x(2 * width - 1, width)
580d32f713Shappy-lx    val high = x(3 * width - 1, 2 * width)
590d32f713Shappy-lx    low ^ mid ^ high
600d32f713Shappy-lx  }
610d32f713Shappy-lx
620d32f713Shappy-lx  def pc_hash_tag(x: UInt): UInt = {
630d32f713Shappy-lx    val low = x(BLK_ADDR_RAW_WIDTH - 1, 0)
640d32f713Shappy-lx    val high = x(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
650d32f713Shappy-lx    val high_hash = vaddr_hash(high)
660d32f713Shappy-lx    Cat(high_hash, low)
670d32f713Shappy-lx  }
680d32f713Shappy-lx
690d32f713Shappy-lx  def block_hash_tag(x: UInt): UInt = {
700d32f713Shappy-lx    val blk_addr = block_addr(x)
710d32f713Shappy-lx    val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0)
720d32f713Shappy-lx    val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
730d32f713Shappy-lx    val high_hash = vaddr_hash(high)
740d32f713Shappy-lx    Cat(high_hash, low)
750d32f713Shappy-lx  }
760d32f713Shappy-lx
770d32f713Shappy-lx  def region_hash_tag(region_tag: UInt): UInt = {
780d32f713Shappy-lx    val low = region_tag(BLK_ADDR_RAW_WIDTH - 1, 0)
790d32f713Shappy-lx    val high = region_tag(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
800d32f713Shappy-lx    val high_hash = vaddr_hash(high)
810d32f713Shappy-lx    Cat(high_hash, low)
820d32f713Shappy-lx  }
830d32f713Shappy-lx
840d32f713Shappy-lx  def region_to_block_addr(region_tag: UInt, region_bits: UInt): UInt = {
850d32f713Shappy-lx    Cat(region_tag, region_bits)
860d32f713Shappy-lx  }
870d32f713Shappy-lx
880d32f713Shappy-lx  def get_candidate_oh(x: UInt): UInt = {
890d32f713Shappy-lx    require(x.getWidth == PAddrBits)
900d32f713Shappy-lx    UIntToOH(x(REGION_BITS + BLOCK_OFFSET - 1, BLOCK_OFFSET))
910d32f713Shappy-lx  }
920d32f713Shappy-lx
930d32f713Shappy-lx  def toBinary(n: Int): String = n match {
940d32f713Shappy-lx    case 0|1 => s"$n"
950d32f713Shappy-lx    case _   => s"${toBinary(n/2)}${n%2}"
960d32f713Shappy-lx  }
970d32f713Shappy-lx}
980d32f713Shappy-lx
990d32f713Shappy-lxtrait HasTrainFilterHelper extends HasCircularQueuePtrHelper {
1000d32f713Shappy-lx  def reorder[T <: LdPrefetchTrainBundle](source: Vec[ValidIO[T]]): Vec[ValidIO[T]] = {
1010d32f713Shappy-lx    if(source.length == 1) {
1020d32f713Shappy-lx      source
1030d32f713Shappy-lx    }else if(source.length == 2) {
1040d32f713Shappy-lx      val source_v = source.map(_.valid)
1050d32f713Shappy-lx      val res = Wire(source.cloneType)
1060d32f713Shappy-lx      // source 1 is older than source 0
1070d32f713Shappy-lx      val source_1_older = isBefore(source(1).bits.uop.robIdx, source(0).bits.uop.robIdx)
1080d32f713Shappy-lx      when(source_1_older) {
1090d32f713Shappy-lx        res(0) := source(1)
1100d32f713Shappy-lx        res(1) := source(0)
1110d32f713Shappy-lx      }.otherwise {
1120d32f713Shappy-lx        res := source
1130d32f713Shappy-lx      }
1140d32f713Shappy-lx
1150d32f713Shappy-lx      res
1160d32f713Shappy-lx    }else if(source.length == 3) {
1170d32f713Shappy-lx      // TODO: generalize
1180d32f713Shappy-lx      val res_0_1 = Wire(source.cloneType)
1190d32f713Shappy-lx      val res_1_2 = Wire(source.cloneType)
1200d32f713Shappy-lx      val res = Wire(source.cloneType)
1210d32f713Shappy-lx
1220d32f713Shappy-lx      val tmp = reorder(VecInit(source.slice(0, 2)))
1230d32f713Shappy-lx      res_0_1(0) := tmp(0)
1240d32f713Shappy-lx      res_0_1(1) := tmp(1)
1250d32f713Shappy-lx      res_0_1(2) := source(2)
1260d32f713Shappy-lx      val tmp_1 = reorder(VecInit(res_0_1.slice(1, 3)))
1270d32f713Shappy-lx      res_1_2(0) := res_0_1(0)
1280d32f713Shappy-lx      res_1_2(1) := tmp_1(0)
1290d32f713Shappy-lx      res_1_2(2) := tmp_1(1)
1300d32f713Shappy-lx      val tmp_2 = reorder(VecInit(res_1_2.slice(0, 2)))
1310d32f713Shappy-lx      res(0) := tmp_2(0)
1320d32f713Shappy-lx      res(1) := tmp_2(1)
1330d32f713Shappy-lx      res(2) := res_1_2(2)
1340d32f713Shappy-lx
1350d32f713Shappy-lx      res
1360d32f713Shappy-lx    }else {
1370d32f713Shappy-lx      require(false, "for now, 4 or more sources are invalid")
1380d32f713Shappy-lx      source
1390d32f713Shappy-lx    }
1400d32f713Shappy-lx  }
1410d32f713Shappy-lx}
1420d32f713Shappy-lx
1430d32f713Shappy-lx// get prefetch train reqs from `exuParameters.LduCnt` load pipelines (up to `exuParameters.LduCnt`/cycle)
1440d32f713Shappy-lx// filter by cache line address, send out train req to stride (up to 1 req/cycle)
1450d32f713Shappy-lxclass TrainFilter(size: Int, name: String)(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper with HasTrainFilterHelper {
1460d32f713Shappy-lx  val io = IO(new Bundle() {
1470d32f713Shappy-lx    val enable = Input(Bool())
1480d32f713Shappy-lx    val flush = Input(Bool())
1490d32f713Shappy-lx    // train input, only from load for now
1500d32f713Shappy-lx    val ld_in = Flipped(Vec(exuParameters.LduCnt, ValidIO(new LdPrefetchTrainBundle())))
1510d32f713Shappy-lx    // filter out
1520d32f713Shappy-lx    val train_req = DecoupledIO(new PrefetchReqBundle())
1530d32f713Shappy-lx  })
1540d32f713Shappy-lx
1550d32f713Shappy-lx  class Ptr(implicit p: Parameters) extends CircularQueuePtr[Ptr]( p => size ){}
1560d32f713Shappy-lx  object Ptr {
1570d32f713Shappy-lx    def apply(f: Bool, v: UInt)(implicit p: Parameters): Ptr = {
1580d32f713Shappy-lx      val ptr = Wire(new Ptr)
1590d32f713Shappy-lx      ptr.flag := f
1600d32f713Shappy-lx      ptr.value := v
1610d32f713Shappy-lx      ptr
1620d32f713Shappy-lx    }
1630d32f713Shappy-lx  }
1640d32f713Shappy-lx
1650d32f713Shappy-lx  val entries = RegInit(VecInit(Seq.fill(size){ (0.U.asTypeOf(new PrefetchReqBundle())) }))
1660d32f713Shappy-lx  val valids = RegInit(VecInit(Seq.fill(size){ (false.B) }))
1670d32f713Shappy-lx
1680d32f713Shappy-lx  // enq
1690d32f713Shappy-lx  val enqLen = exuParameters.LduCnt
1700d32f713Shappy-lx  val enqPtrExt = RegInit(VecInit((0 until enqLen).map(_.U.asTypeOf(new Ptr))))
1710d32f713Shappy-lx  val deqPtrExt = RegInit(0.U.asTypeOf(new Ptr))
1720d32f713Shappy-lx
1730d32f713Shappy-lx  val deqPtr = WireInit(deqPtrExt.value)
1740d32f713Shappy-lx
1750d32f713Shappy-lx  require(size >= enqLen)
1760d32f713Shappy-lx
1770d32f713Shappy-lx  val ld_in_reordered = reorder(io.ld_in)
1780d32f713Shappy-lx  val reqs_l = ld_in_reordered.map(_.bits.asPrefetchReqBundle())
1790d32f713Shappy-lx  val reqs_vl = ld_in_reordered.map(_.valid)
1800d32f713Shappy-lx  val needAlloc = Wire(Vec(enqLen, Bool()))
1810d32f713Shappy-lx  val canAlloc = Wire(Vec(enqLen, Bool()))
1820d32f713Shappy-lx
1830d32f713Shappy-lx  for(i <- (0 until enqLen)) {
1840d32f713Shappy-lx    val req = reqs_l(i)
1850d32f713Shappy-lx    val req_v = reqs_vl(i)
1860d32f713Shappy-lx    val index = PopCount(needAlloc.take(i))
1870d32f713Shappy-lx    val allocPtr = enqPtrExt(index)
1880d32f713Shappy-lx    val entry_match = Cat(entries.zip(valids).map {
1890d32f713Shappy-lx      case(e, v) => v && block_hash_tag(e.vaddr) === block_hash_tag(req.vaddr)
1900d32f713Shappy-lx    }).orR
1910d32f713Shappy-lx    val prev_enq_match = if(i == 0) false.B else Cat(reqs_l.zip(reqs_vl).take(i).map {
1920d32f713Shappy-lx      case(pre, pre_v) => pre_v && block_hash_tag(pre.vaddr) === block_hash_tag(req.vaddr)
1930d32f713Shappy-lx    }).orR
1940d32f713Shappy-lx
1950d32f713Shappy-lx    needAlloc(i) := req_v && !entry_match && !prev_enq_match
1960d32f713Shappy-lx    canAlloc(i) := needAlloc(i) && allocPtr >= deqPtrExt && io.enable
1970d32f713Shappy-lx
1980d32f713Shappy-lx    when(canAlloc(i)) {
1990d32f713Shappy-lx      valids(allocPtr.value) := true.B
2000d32f713Shappy-lx      entries(allocPtr.value) := req
2010d32f713Shappy-lx    }
2020d32f713Shappy-lx  }
2030d32f713Shappy-lx  val allocNum = PopCount(canAlloc)
2040d32f713Shappy-lx
2050d32f713Shappy-lx  enqPtrExt.foreach{case x => x := x + allocNum}
2060d32f713Shappy-lx
2070d32f713Shappy-lx  // deq
2080d32f713Shappy-lx  io.train_req.valid := false.B
2090d32f713Shappy-lx  io.train_req.bits := DontCare
2100d32f713Shappy-lx  valids.zip(entries).zipWithIndex.foreach {
2110d32f713Shappy-lx    case((valid, entry), i) => {
2120d32f713Shappy-lx      when(deqPtr === i.U) {
2130d32f713Shappy-lx        io.train_req.valid := valid && io.enable
2140d32f713Shappy-lx        io.train_req.bits := entry
2150d32f713Shappy-lx      }
2160d32f713Shappy-lx    }
2170d32f713Shappy-lx  }
2180d32f713Shappy-lx
2190d32f713Shappy-lx  when(io.train_req.fire) {
2200d32f713Shappy-lx    valids(deqPtr) := false.B
2210d32f713Shappy-lx    deqPtrExt := deqPtrExt + 1.U
2220d32f713Shappy-lx  }
2230d32f713Shappy-lx
2240d32f713Shappy-lx  when(RegNext(io.flush)) {
2250d32f713Shappy-lx    valids.foreach {case valid => valid := false.B}
2260d32f713Shappy-lx    (0 until enqLen).map {case i => enqPtrExt(i) := i.U.asTypeOf(new Ptr)}
2270d32f713Shappy-lx    deqPtrExt := 0.U.asTypeOf(new Ptr)
2280d32f713Shappy-lx  }
2290d32f713Shappy-lx
2300d32f713Shappy-lx  XSPerfAccumulate(s"${name}_train_filter_full", PopCount(valids) === size.U)
2310d32f713Shappy-lx  XSPerfAccumulate(s"${name}_train_filter_half", PopCount(valids) >= (size / 2).U)
2320d32f713Shappy-lx  XSPerfAccumulate(s"${name}_train_filter_empty", PopCount(valids) === 0.U)
2330d32f713Shappy-lx
2340d32f713Shappy-lx  val raw_enq_pattern = Cat(reqs_vl)
2350d32f713Shappy-lx  val filtered_enq_pattern = Cat(needAlloc)
2360d32f713Shappy-lx  val actual_enq_pattern = Cat(canAlloc)
2370d32f713Shappy-lx  XSPerfAccumulate(s"${name}_train_filter_enq", allocNum > 0.U)
2380d32f713Shappy-lx  XSPerfAccumulate(s"${name}_train_filter_deq", io.train_req.fire)
2390d32f713Shappy-lx  for(i <- 0 until (1 << enqLen)) {
2400d32f713Shappy-lx    XSPerfAccumulate(s"${name}_train_filter_raw_enq_pattern_${toBinary(i)}", raw_enq_pattern === i.U)
2410d32f713Shappy-lx    XSPerfAccumulate(s"${name}_train_filter_filtered_enq_pattern_${toBinary(i)}", filtered_enq_pattern === i.U)
2420d32f713Shappy-lx    XSPerfAccumulate(s"${name}_train_filter_actual_enq_pattern_${toBinary(i)}", actual_enq_pattern === i.U)
2430d32f713Shappy-lx  }
2440d32f713Shappy-lx}
2450d32f713Shappy-lx
2460d32f713Shappy-lxclass MLPReqFilterBundle(implicit p: Parameters) extends XSBundle with HasL1PrefetchHelper {
2470d32f713Shappy-lx  val tag = UInt(HASH_TAG_WIDTH.W)
2480d32f713Shappy-lx  val region = UInt(REGION_TAG_BITS.W)
2490d32f713Shappy-lx  val bit_vec = UInt(BIT_VEC_WITDH.W)
2500d32f713Shappy-lx  // NOTE: l1 will not use sent_vec, for making more prefetch reqs to l1 dcache
2510d32f713Shappy-lx  val sent_vec = UInt(BIT_VEC_WITDH.W)
2520d32f713Shappy-lx  val sink = UInt(SINK_BITS.W)
2530d32f713Shappy-lx  val alias = UInt(2.W)
2540d32f713Shappy-lx  val is_vaddr = Bool()
2550d32f713Shappy-lx  val source = new L1PrefetchSource()
2560d32f713Shappy-lx
2570d32f713Shappy-lx  def reset(index: Int) = {
2580d32f713Shappy-lx    tag := region_hash_tag(index.U)
2590d32f713Shappy-lx    region := index.U
2600d32f713Shappy-lx    bit_vec := 0.U
2610d32f713Shappy-lx    sent_vec := 0.U
2620d32f713Shappy-lx    sink := SINK_L1
2630d32f713Shappy-lx    alias := 0.U
2640d32f713Shappy-lx    is_vaddr := false.B
2650d32f713Shappy-lx    source.value := L1_HW_PREFETCH_NULL
2660d32f713Shappy-lx  }
2670d32f713Shappy-lx
2680d32f713Shappy-lx  def tag_match(new_tag: UInt): Bool = {
2690d32f713Shappy-lx    require(new_tag.getWidth == HASH_TAG_WIDTH)
2700d32f713Shappy-lx    tag === new_tag
2710d32f713Shappy-lx  }
2720d32f713Shappy-lx
2730d32f713Shappy-lx  def update(update_bit_vec: UInt, update_sink: UInt) = {
2740d32f713Shappy-lx    bit_vec := bit_vec | update_bit_vec
2750d32f713Shappy-lx    when(update_sink < sink) {
2760d32f713Shappy-lx      bit_vec := (bit_vec & ~sent_vec) | update_bit_vec
2770d32f713Shappy-lx      sink := update_sink
2780d32f713Shappy-lx    }
2790d32f713Shappy-lx
2800d32f713Shappy-lx    assert(PopCount(update_bit_vec) >= 1.U, "valid bits in update vector should greater than one")
2810d32f713Shappy-lx  }
2820d32f713Shappy-lx
2830d32f713Shappy-lx  def can_send_pf(): Bool = {
2840d32f713Shappy-lx    Mux(
2850d32f713Shappy-lx      sink === SINK_L1,
2860d32f713Shappy-lx      !is_vaddr && bit_vec.orR,
2870d32f713Shappy-lx      !is_vaddr && (bit_vec & ~sent_vec).orR
2880d32f713Shappy-lx    )
2890d32f713Shappy-lx  }
2900d32f713Shappy-lx
2910d32f713Shappy-lx  def get_pf_addr(): UInt = {
2920d32f713Shappy-lx    require(PAddrBits <= VAddrBits)
2930d32f713Shappy-lx    require((region.getWidth + REGION_BITS + BLOCK_OFFSET) == VAddrBits)
2940d32f713Shappy-lx
2950d32f713Shappy-lx    val candidate = Mux(
2960d32f713Shappy-lx      sink === SINK_L1,
2970d32f713Shappy-lx      PriorityEncoder(bit_vec).asTypeOf(UInt(REGION_BITS.W)),
2980d32f713Shappy-lx      PriorityEncoder(bit_vec & ~sent_vec).asTypeOf(UInt(REGION_BITS.W))
2990d32f713Shappy-lx    )
3000d32f713Shappy-lx    Cat(region, candidate, 0.U(BLOCK_OFFSET.W))
3010d32f713Shappy-lx  }
3020d32f713Shappy-lx
3030d32f713Shappy-lx  def get_tlb_va(): UInt = {
3040d32f713Shappy-lx    require((region.getWidth + REGION_TAG_OFFSET) == VAddrBits)
3050d32f713Shappy-lx    Cat(region, 0.U(REGION_TAG_OFFSET.W))
3060d32f713Shappy-lx  }
3070d32f713Shappy-lx
3080d32f713Shappy-lx  def fromStreamPrefetchReqBundle(x : StreamPrefetchReqBundle): MLPReqFilterBundle = {
3090d32f713Shappy-lx    require(PAGE_OFFSET >= REGION_TAG_OFFSET, "region is greater than 4k, alias bit may be incorrect")
3100d32f713Shappy-lx
3110d32f713Shappy-lx    val res = Wire(new MLPReqFilterBundle)
3120d32f713Shappy-lx    res.tag := region_hash_tag(x.region)
3130d32f713Shappy-lx    res.region := x.region
3140d32f713Shappy-lx    res.bit_vec := x.bit_vec
3150d32f713Shappy-lx    res.sent_vec := 0.U
3160d32f713Shappy-lx    res.sink := x.sink
3170d32f713Shappy-lx    res.is_vaddr := true.B
3180d32f713Shappy-lx    res.source := x.source
3190d32f713Shappy-lx    res.alias := x.region(PAGE_OFFSET - REGION_TAG_OFFSET + 1, PAGE_OFFSET - REGION_TAG_OFFSET)
3200d32f713Shappy-lx
3210d32f713Shappy-lx    res
3220d32f713Shappy-lx  }
3230d32f713Shappy-lx
3240d32f713Shappy-lx  def invalidate() = {
3250d32f713Shappy-lx    // disable sending pf req
3260d32f713Shappy-lx    when(sink === SINK_L1) {
3270d32f713Shappy-lx      bit_vec := 0.U(BIT_VEC_WITDH.W)
3280d32f713Shappy-lx    }.otherwise {
3290d32f713Shappy-lx      sent_vec := ~(0.U(BIT_VEC_WITDH.W))
3300d32f713Shappy-lx    }
3310d32f713Shappy-lx    // disable sending tlb req
3320d32f713Shappy-lx    is_vaddr := false.B
3330d32f713Shappy-lx  }
3340d32f713Shappy-lx}
3350d32f713Shappy-lx
3360d32f713Shappy-lx// there are 5 independent pipelines inside
3370d32f713Shappy-lx// 1. prefetch enqueue
3380d32f713Shappy-lx// 2. tlb request
3390d32f713Shappy-lx// 3. actual l1 prefetch
3400d32f713Shappy-lx// 4. actual l2 prefetch
3410d32f713Shappy-lx// 5. actual l3 prefetch
3420d32f713Shappy-lxclass MutiLevelPrefetchFilter(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper {
3430d32f713Shappy-lx  val io = IO(new XSBundle {
3440d32f713Shappy-lx    val enable = Input(Bool())
3450d32f713Shappy-lx    val flush = Input(Bool())
3460d32f713Shappy-lx    val prefetch_req = Flipped(ValidIO(new StreamPrefetchReqBundle))
3470d32f713Shappy-lx    val tlb_req = new TlbRequestIO(nRespDups = 2)
3480d32f713Shappy-lx    val l1_req = DecoupledIO(new L1PrefetchReq())
3490d32f713Shappy-lx    val l2_pf_addr = ValidIO(new L2PrefetchReq())
3500d32f713Shappy-lx    val l3_pf_addr = ValidIO(UInt(PAddrBits.W)) // TODO: l3 pf source
3510d32f713Shappy-lx    val confidence = Input(UInt(1.W))
3520d32f713Shappy-lx    val l2PfqBusy = Input(Bool())
3530d32f713Shappy-lx  })
3540d32f713Shappy-lx
3550d32f713Shappy-lx  val array = Reg(Vec(MLP_SIZE, new MLPReqFilterBundle))
3560d32f713Shappy-lx  val replacement = ReplacementPolicy.fromString("plru", MLP_SIZE)
3570d32f713Shappy-lx  val tlb_req_arb = Module(new RRArbiterInit(new TlbReq, MLP_SIZE))
3580d32f713Shappy-lx  val l1_pf_req_arb = Module(new RRArbiterInit(new L1PrefetchReq, MLP_SIZE))
3590d32f713Shappy-lx  val l2_pf_req_arb = Module(new RRArbiterInit(new L2PrefetchReq, MLP_SIZE))
3600d32f713Shappy-lx  val l3_pf_req_arb = Module(new RRArbiterInit(UInt(PAddrBits.W), MLP_SIZE))
3610d32f713Shappy-lx
3620d32f713Shappy-lx  // enq
3630d32f713Shappy-lx  // s0: hash tag match
3640d32f713Shappy-lx  val s0_can_accept = Wire(Bool())
3650d32f713Shappy-lx  val s0_valid = io.prefetch_req.valid && s0_can_accept
3660d32f713Shappy-lx  val s0_region = io.prefetch_req.bits.region
3670d32f713Shappy-lx  val s0_region_hash = region_hash_tag(s0_region)
3680d32f713Shappy-lx  val s0_match_vec = array.map(_.tag_match(s0_region_hash))
3690d32f713Shappy-lx  val s0_hit = VecInit(s0_match_vec).asUInt.orR
3700d32f713Shappy-lx  val s0_index = Mux(s0_hit, OHToUInt(VecInit(s0_match_vec).asUInt), replacement.way)
3710d32f713Shappy-lx  val s0_prefetch_req = (new MLPReqFilterBundle).fromStreamPrefetchReqBundle(io.prefetch_req.bits)
3720d32f713Shappy-lx
3730d32f713Shappy-lx  when(s0_valid) {
3740d32f713Shappy-lx    replacement.access(s0_index)
3750d32f713Shappy-lx  }
3760d32f713Shappy-lx
3770d32f713Shappy-lx  assert(!s0_valid || PopCount(VecInit(s0_match_vec)) <= 1.U, "req region should match no more than 1 entry")
3780d32f713Shappy-lx  assert(!(s0_valid && RegNext(s0_valid) && !s0_hit && !RegNext(s0_hit) && replacement.way === RegNext(replacement.way)), "replacement error")
3790d32f713Shappy-lx
3800d32f713Shappy-lx  XSPerfAccumulate("s0_enq_fire", s0_valid)
3810d32f713Shappy-lx  XSPerfAccumulate("s0_enq_valid", io.prefetch_req.valid)
3820d32f713Shappy-lx  XSPerfAccumulate("s0_cannot_enq", io.prefetch_req.valid && !s0_can_accept)
3830d32f713Shappy-lx
3840d32f713Shappy-lx  // s1: alloc or update
3850d32f713Shappy-lx  val s1_valid = RegNext(s0_valid)
3860d32f713Shappy-lx  val s1_region = RegEnable(s0_region, s0_valid)
3870d32f713Shappy-lx  val s1_region_hash = RegEnable(s0_region_hash, s0_valid)
3880d32f713Shappy-lx  val s1_hit = RegEnable(s0_hit, s0_valid)
3890d32f713Shappy-lx  val s1_index = RegEnable(s0_index, s0_valid)
3900d32f713Shappy-lx  val s1_prefetch_req = RegEnable(s0_prefetch_req, s0_valid)
3910d32f713Shappy-lx  val s1_alloc = s1_valid && !s1_hit
3920d32f713Shappy-lx  val s1_update = s1_valid && s1_hit
3930d32f713Shappy-lx  s0_can_accept := !(s1_valid && s1_alloc && (s0_region_hash === s1_region_hash))
3940d32f713Shappy-lx
3950d32f713Shappy-lx  when(s1_alloc) {
3960d32f713Shappy-lx    array(s1_index) := s1_prefetch_req
3970d32f713Shappy-lx  }.elsewhen(s1_update) {
3980d32f713Shappy-lx    array(s1_index).update(
3990d32f713Shappy-lx      update_bit_vec = s1_prefetch_req.bit_vec,
4000d32f713Shappy-lx      update_sink = s1_prefetch_req.sink
4010d32f713Shappy-lx    )
4020d32f713Shappy-lx  }
4030d32f713Shappy-lx
4040d32f713Shappy-lx  // TODO: set this constraint looser to enable more kinds of depth
4050d32f713Shappy-lx  // assert(!(s0_valid && s1_valid && s0_region === s1_region), "s0 and s1 must have different region")
4060d32f713Shappy-lx
4070d32f713Shappy-lx  XSPerfAccumulate("s1_enq_valid", s1_valid)
4080d32f713Shappy-lx  XSPerfAccumulate("s1_enq_alloc", s1_alloc)
4090d32f713Shappy-lx  XSPerfAccumulate("s1_enq_update", s1_update)
4100d32f713Shappy-lx  XSPerfAccumulate("hash_conflict", s0_valid && RegNext(s1_valid) && (s0_region =/= RegNext(s1_region)) && (s0_region_hash === RegNext(s1_region_hash)))
4110d32f713Shappy-lx
4120d32f713Shappy-lx  // tlb req
4130d32f713Shappy-lx  // s0: arb all tlb reqs
4140d32f713Shappy-lx  val s0_tlb_fire_vec = VecInit((0 until MLP_SIZE).map{case i => tlb_req_arb.io.in(i).fire})
4150d32f713Shappy-lx  val s1_tlb_fire_vec = RegNext(s0_tlb_fire_vec)
4160d32f713Shappy-lx  val s2_tlb_fire_vec = RegNext(s1_tlb_fire_vec)
4170d32f713Shappy-lx
4180d32f713Shappy-lx  for(i <- 0 until MLP_SIZE) {
4190d32f713Shappy-lx    val evict = s1_alloc && (s1_index === i.U)
4200d32f713Shappy-lx    tlb_req_arb.io.in(i).valid := array(i).is_vaddr && !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !evict
4210d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.vaddr := array(i).get_tlb_va()
4220d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read
4230d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.size := 3.U
4240d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.kill := false.B
4250d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.no_translate := false.B
4260d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.memidx := DontCare
4270d32f713Shappy-lx    tlb_req_arb.io.in(i).bits.debug := DontCare
4280d32f713Shappy-lx  }
4290d32f713Shappy-lx
4300d32f713Shappy-lx  assert(PopCount(s0_tlb_fire_vec) <= 1.U, "s0_tlb_fire_vec should be one-hot or empty")
4310d32f713Shappy-lx
4320d32f713Shappy-lx  // s1: send out the req
4330d32f713Shappy-lx  val s1_tlb_req_valid = RegNext(tlb_req_arb.io.out.valid)
4340d32f713Shappy-lx  val s1_tlb_req_bits = RegEnable(tlb_req_arb.io.out.bits, tlb_req_arb.io.out.valid)
4350d32f713Shappy-lx  val s1_tlb_req_index = RegEnable(OHToUInt(s0_tlb_fire_vec.asUInt), tlb_req_arb.io.out.valid)
4360d32f713Shappy-lx  val s1_tlb_evict = s1_alloc && (s1_index === s1_tlb_req_index)
4370d32f713Shappy-lx  io.tlb_req.req.valid := s1_tlb_req_valid && !s1_tlb_evict
4380d32f713Shappy-lx  io.tlb_req.req.bits := s1_tlb_req_bits
4390d32f713Shappy-lx  io.tlb_req.req_kill := false.B
4400d32f713Shappy-lx  tlb_req_arb.io.out.ready := true.B
4410d32f713Shappy-lx
4420d32f713Shappy-lx  XSPerfAccumulate("s1_tlb_req_sent", io.tlb_req.req.valid)
4430d32f713Shappy-lx  XSPerfAccumulate("s1_tlb_req_evict", s1_tlb_req_valid && s1_tlb_evict)
4440d32f713Shappy-lx
4450d32f713Shappy-lx  // s2: get response from tlb
4460d32f713Shappy-lx  val s2_tlb_resp = io.tlb_req.resp
4470d32f713Shappy-lx  val s2_tlb_update_index = RegEnable(s1_tlb_req_index, s1_tlb_req_valid)
4480d32f713Shappy-lx  val s2_tlb_evict = s1_alloc && (s1_index === s2_tlb_update_index)
4490d32f713Shappy-lx  when(s2_tlb_resp.valid && !s2_tlb_evict) {
4500d32f713Shappy-lx    array(s2_tlb_update_index).is_vaddr := s2_tlb_resp.bits.miss
4510d32f713Shappy-lx
4520d32f713Shappy-lx    when(!s2_tlb_resp.bits.miss) {
4530d32f713Shappy-lx      array(s2_tlb_update_index).region := Cat(0.U((VAddrBits - PAddrBits).W), s2_tlb_resp.bits.paddr.head(s2_tlb_resp.bits.paddr.head.getWidth - 1, REGION_TAG_OFFSET))
4540d32f713Shappy-lx      when(s2_tlb_resp.bits.excp.head.pf.ld || s2_tlb_resp.bits.excp.head.af.ld) {
4550d32f713Shappy-lx        array(s2_tlb_update_index).invalidate()
4560d32f713Shappy-lx      }
4570d32f713Shappy-lx    }
4580d32f713Shappy-lx  }
4590d32f713Shappy-lx  s2_tlb_resp.ready := true.B
4600d32f713Shappy-lx
4610d32f713Shappy-lx  XSPerfAccumulate("s2_tlb_resp_valid", s2_tlb_resp.valid)
4620d32f713Shappy-lx  XSPerfAccumulate("s2_tlb_resp_evict", s2_tlb_resp.valid && s2_tlb_evict)
4630d32f713Shappy-lx  XSPerfAccumulate("s2_tlb_resp_miss", s2_tlb_resp.valid && !s2_tlb_evict && s2_tlb_resp.bits.miss)
4640d32f713Shappy-lx  XSPerfAccumulate("s2_tlb_resp_updated", s2_tlb_resp.valid && !s2_tlb_evict && !s2_tlb_resp.bits.miss)
4650d32f713Shappy-lx  XSPerfAccumulate("s2_tlb_resp_page_fault", s2_tlb_resp.valid && !s2_tlb_evict && !s2_tlb_resp.bits.miss && s2_tlb_resp.bits.excp.head.pf.ld)
4660d32f713Shappy-lx  XSPerfAccumulate("s2_tlb_resp_access_fault", s2_tlb_resp.valid && !s2_tlb_evict && !s2_tlb_resp.bits.miss && s2_tlb_resp.bits.excp.head.af.ld)
4670d32f713Shappy-lx
4680d32f713Shappy-lx  // l1 pf
4690d32f713Shappy-lx  // s0: generate prefetch req paddr per entry, arb them
4700d32f713Shappy-lx  val s0_pf_fire_vec = VecInit((0 until MLP_SIZE).map{case i => l1_pf_req_arb.io.in(i).fire})
4710d32f713Shappy-lx  val s1_pf_fire_vec = RegNext(s0_pf_fire_vec)
4720d32f713Shappy-lx
4730d32f713Shappy-lx  val s0_pf_fire = l1_pf_req_arb.io.out.fire
4740d32f713Shappy-lx  val s0_pf_index = OHToUInt(s0_pf_fire_vec.asUInt)
4750d32f713Shappy-lx  val s0_pf_candidate_oh = get_candidate_oh(l1_pf_req_arb.io.out.bits.paddr)
4760d32f713Shappy-lx
4770d32f713Shappy-lx  for(i <- 0 until MLP_SIZE) {
4780d32f713Shappy-lx    val evict = s1_alloc && (s1_index === i.U)
4790d32f713Shappy-lx    l1_pf_req_arb.io.in(i).valid := array(i).can_send_pf() && (array(i).sink === SINK_L1) && !evict
4800d32f713Shappy-lx    l1_pf_req_arb.io.in(i).bits.paddr := array(i).get_pf_addr()
4810d32f713Shappy-lx    l1_pf_req_arb.io.in(i).bits.alias := array(i).alias
4820d32f713Shappy-lx    l1_pf_req_arb.io.in(i).bits.confidence := io.confidence
4830d32f713Shappy-lx    l1_pf_req_arb.io.in(i).bits.is_store := false.B
4840d32f713Shappy-lx    l1_pf_req_arb.io.in(i).bits.pf_source := array(i).source
4850d32f713Shappy-lx  }
4860d32f713Shappy-lx
4870d32f713Shappy-lx  when(s0_pf_fire) {
4880d32f713Shappy-lx    array(s0_pf_index).sent_vec := array(s0_pf_index).sent_vec | s0_pf_candidate_oh
4890d32f713Shappy-lx  }
4900d32f713Shappy-lx
4910d32f713Shappy-lx  assert(PopCount(s0_pf_fire_vec) <= 1.U, "s0_pf_fire_vec should be one-hot or empty")
4920d32f713Shappy-lx
4930d32f713Shappy-lx  // s1: send out to dcache
4940d32f713Shappy-lx  val s1_pf_valid = Reg(Bool())
4950d32f713Shappy-lx  val s1_pf_bits = RegEnable(l1_pf_req_arb.io.out.bits, l1_pf_req_arb.io.out.fire)
4960d32f713Shappy-lx  val s1_pf_index = RegEnable(s0_pf_index, l1_pf_req_arb.io.out.fire)
4970d32f713Shappy-lx  val s1_pf_candidate_oh = RegEnable(s0_pf_candidate_oh, l1_pf_req_arb.io.out.fire)
4980d32f713Shappy-lx  val s1_pf_evict = s1_alloc && (s1_index === s1_pf_index)
4990d32f713Shappy-lx  val s1_pf_update = s1_update && (s1_index === s1_pf_index)
5000d32f713Shappy-lx  val s1_pf_can_go = io.l1_req.ready && !s1_pf_evict && !s1_pf_update
5010d32f713Shappy-lx  val s1_pf_fire = s1_pf_valid && s1_pf_can_go
5020d32f713Shappy-lx
5030d32f713Shappy-lx  when(s1_pf_can_go) {
5040d32f713Shappy-lx    s1_pf_valid := false.B
5050d32f713Shappy-lx  }
5060d32f713Shappy-lx
5070d32f713Shappy-lx  when(l1_pf_req_arb.io.out.fire) {
5080d32f713Shappy-lx    s1_pf_valid := true.B
5090d32f713Shappy-lx  }
5100d32f713Shappy-lx
5110d32f713Shappy-lx  when(s1_pf_fire) {
5120d32f713Shappy-lx    array(s1_pf_index).bit_vec := array(s1_pf_index).bit_vec & ~s1_pf_candidate_oh
5130d32f713Shappy-lx  }
5140d32f713Shappy-lx
5150d32f713Shappy-lx  // FIXME: the logic is to long, add an extra pf pipe stage
5160d32f713Shappy-lx  io.l1_req.valid := s1_pf_valid && !s1_pf_evict && !s1_pf_update && (s1_pf_bits.paddr >= 0x80000000L.U) && io.enable
5170d32f713Shappy-lx  io.l1_req.bits := s1_pf_bits
5180d32f713Shappy-lx
5190d32f713Shappy-lx  l1_pf_req_arb.io.out.ready := s1_pf_can_go || !s1_pf_valid
5200d32f713Shappy-lx
5210d32f713Shappy-lx  assert(!((s1_alloc || s1_update) && s1_pf_fire && (s1_index === s1_pf_index)), "pf pipeline & enq pipeline bit_vec harzard!")
5220d32f713Shappy-lx
5230d32f713Shappy-lx  XSPerfAccumulate("s1_pf_valid", s1_pf_valid)
5240d32f713Shappy-lx  XSPerfAccumulate("s1_pf_block_by_pipe_unready", s1_pf_valid && !io.l1_req.ready)
5250d32f713Shappy-lx  XSPerfAccumulate("s1_pf_block_by_enq_alloc_harzard", s1_pf_valid && s1_pf_evict)
5260d32f713Shappy-lx  XSPerfAccumulate("s1_pf_block_by_enq_update_harzard", s1_pf_valid && s1_pf_update)
5270d32f713Shappy-lx  XSPerfAccumulate("s1_pf_fire", s1_pf_fire)
5280d32f713Shappy-lx
5290d32f713Shappy-lx  // l2 pf
5300d32f713Shappy-lx  // s0: generate prefetch req paddr per entry, arb them, sent out
5310d32f713Shappy-lx  io.l2_pf_addr.valid := l2_pf_req_arb.io.out.valid
5320d32f713Shappy-lx  io.l2_pf_addr.bits := l2_pf_req_arb.io.out.bits
5330d32f713Shappy-lx
5340d32f713Shappy-lx  l2_pf_req_arb.io.out.ready := true.B
5350d32f713Shappy-lx
5360d32f713Shappy-lx  for(i <- 0 until MLP_SIZE) {
5370d32f713Shappy-lx    val evict = s1_alloc && (s1_index === i.U)
5380d32f713Shappy-lx    l2_pf_req_arb.io.in(i).valid := array(i).can_send_pf() && (array(i).sink === SINK_L2) && !evict
5390d32f713Shappy-lx    l2_pf_req_arb.io.in(i).bits.addr := array(i).get_pf_addr()
5400d32f713Shappy-lx    l2_pf_req_arb.io.in(i).bits.source := MuxLookup(array(i).source.value, MemReqSource.Prefetch2L2Unknown.id.U, Seq(
5410d32f713Shappy-lx      L1_HW_PREFETCH_STRIDE -> MemReqSource.Prefetch2L2Stride.id.U,
5420d32f713Shappy-lx      L1_HW_PREFETCH_STREAM -> MemReqSource.Prefetch2L2Stream.id.U
5430d32f713Shappy-lx    ))
5440d32f713Shappy-lx  }
5450d32f713Shappy-lx
5460d32f713Shappy-lx  when(l2_pf_req_arb.io.out.valid) {
5470d32f713Shappy-lx    array(l2_pf_req_arb.io.chosen).sent_vec := array(l2_pf_req_arb.io.chosen).sent_vec | get_candidate_oh(l2_pf_req_arb.io.out.bits.addr)
5480d32f713Shappy-lx  }
5490d32f713Shappy-lx
5500d32f713Shappy-lx  // last level cache pf
5510d32f713Shappy-lx  // s0: generate prefetch req paddr per entry, arb them, sent out
5520d32f713Shappy-lx  io.l3_pf_addr.valid := l3_pf_req_arb.io.out.valid
5530d32f713Shappy-lx  io.l3_pf_addr.bits := l3_pf_req_arb.io.out.bits
5540d32f713Shappy-lx
5550d32f713Shappy-lx  l3_pf_req_arb.io.out.ready := true.B
5560d32f713Shappy-lx
5570d32f713Shappy-lx  for(i <- 0 until MLP_SIZE) {
5580d32f713Shappy-lx    val evict = s1_alloc && (s1_index === i.U)
5590d32f713Shappy-lx    l3_pf_req_arb.io.in(i).valid := array(i).can_send_pf() && (array(i).sink === SINK_L3) && !evict
5600d32f713Shappy-lx    l3_pf_req_arb.io.in(i).bits := array(i).get_pf_addr()
5610d32f713Shappy-lx  }
5620d32f713Shappy-lx
5630d32f713Shappy-lx  when(l3_pf_req_arb.io.out.valid) {
5640d32f713Shappy-lx    array(l3_pf_req_arb.io.chosen).sent_vec := array(l3_pf_req_arb.io.chosen).sent_vec | get_candidate_oh(l3_pf_req_arb.io.out.bits)
5650d32f713Shappy-lx  }
5660d32f713Shappy-lx
5670d32f713Shappy-lx  // reset meta to avoid muti-hit problem
5680d32f713Shappy-lx  for(i <- 0 until MLP_SIZE) {
5690d32f713Shappy-lx    when(reset.asBool || RegNext(io.flush)) {
5700d32f713Shappy-lx      array(i).reset(i)
5710d32f713Shappy-lx    }
5720d32f713Shappy-lx  }
5730d32f713Shappy-lx
5740d32f713Shappy-lx  XSPerfAccumulate("l2_prefetche_queue_busby", io.l2PfqBusy)
5750d32f713Shappy-lx  XSPerfHistogram("filter_active", PopCount(VecInit(array.map(_.can_send_pf())).asUInt), true.B, 0, MLP_SIZE, 1)
5760d32f713Shappy-lx  XSPerfHistogram("l1_filter_active", PopCount(VecInit(array.map(x => x.can_send_pf() && (x.sink === SINK_L1))).asUInt), true.B, 0, MLP_SIZE, 1)
5770d32f713Shappy-lx  XSPerfHistogram("l2_filter_active", PopCount(VecInit(array.map(x => x.can_send_pf() && (x.sink === SINK_L2))).asUInt), true.B, 0, MLP_SIZE, 1)
5780d32f713Shappy-lx  XSPerfHistogram("l3_filter_active", PopCount(VecInit(array.map(x => x.can_send_pf() && (x.sink === SINK_L3))).asUInt), true.B, 0, MLP_SIZE, 1)
5790d32f713Shappy-lx}
5800d32f713Shappy-lx
5810d32f713Shappy-lxclass L1Prefetcher(implicit p: Parameters) extends BasePrefecher with HasStreamPrefetchHelper with HasStridePrefetchHelper {
5820d32f713Shappy-lx  val pf_ctrl = IO(Input(new PrefetchControlBundle))
5830d32f713Shappy-lx  val stride_train = IO(Flipped(Vec(exuParameters.LduCnt, ValidIO(new LdPrefetchTrainBundle()))))
5840d32f713Shappy-lx  val l2PfqBusy = IO(Input(Bool()))
5850d32f713Shappy-lx
5860d32f713Shappy-lx  val stride_train_filter = Module(new TrainFilter(STRIDE_FILTER_SIZE, "stride"))
5870d32f713Shappy-lx  val stride_meta_array = Module(new StrideMetaArray)
5880d32f713Shappy-lx  val stream_train_filter = Module(new TrainFilter(STREAM_FILTER_SIZE, "stream"))
5890d32f713Shappy-lx  val stream_bit_vec_array = Module(new StreamBitVectorArray)
5900d32f713Shappy-lx  val pf_queue_filter = Module(new MutiLevelPrefetchFilter)
5910d32f713Shappy-lx
5920d32f713Shappy-lx  // for now, if the stream is disabled, train and prefetch process will continue, without sending out and reqs
5930d32f713Shappy-lx  val enable = io.enable
5940d32f713Shappy-lx  val flush = pf_ctrl.flush
5950d32f713Shappy-lx
5960d32f713Shappy-lx  stream_train_filter.io.ld_in.zipWithIndex.foreach {
5970d32f713Shappy-lx    case (ld_in, i) => {
5980d32f713Shappy-lx      ld_in.valid := io.ld_in(i).valid && enable
5990d32f713Shappy-lx      ld_in.bits := io.ld_in(i).bits
6000d32f713Shappy-lx    }
6010d32f713Shappy-lx  }
6020d32f713Shappy-lx  stream_train_filter.io.enable := enable
6030d32f713Shappy-lx  stream_train_filter.io.flush := flush
6040d32f713Shappy-lx
6050d32f713Shappy-lx  stride_train_filter.io.ld_in.zipWithIndex.foreach {
6060d32f713Shappy-lx    case (ld_in, i) => {
6070d32f713Shappy-lx      ld_in.valid := stride_train(i).valid && enable
6080d32f713Shappy-lx      ld_in.bits := stride_train(i).bits
6090d32f713Shappy-lx    }
6100d32f713Shappy-lx  }
6110d32f713Shappy-lx  stride_train_filter.io.enable := enable
6120d32f713Shappy-lx  stride_train_filter.io.flush := flush
6130d32f713Shappy-lx
6140d32f713Shappy-lx  stream_bit_vec_array.io.enable := enable
6150d32f713Shappy-lx  stream_bit_vec_array.io.flush := flush
6160d32f713Shappy-lx  stream_bit_vec_array.io.dynamic_depth := pf_ctrl.dynamic_depth
6170d32f713Shappy-lx  stream_bit_vec_array.io.train_req <> stream_train_filter.io.train_req
6180d32f713Shappy-lx
6190d32f713Shappy-lx  stride_meta_array.io.enable := enable
6200d32f713Shappy-lx  stride_meta_array.io.flush := flush
6210d32f713Shappy-lx  stride_meta_array.io.dynamic_depth := 0.U
6220d32f713Shappy-lx  stride_meta_array.io.train_req <> stride_train_filter.io.train_req
6230d32f713Shappy-lx  stride_meta_array.io.stream_lookup_req <> stream_bit_vec_array.io.stream_lookup_req
6240d32f713Shappy-lx  stride_meta_array.io.stream_lookup_resp <> stream_bit_vec_array.io.stream_lookup_resp
6250d32f713Shappy-lx
6260d32f713Shappy-lx  // stream has higher priority than stride
6270d32f713Shappy-lx  pf_queue_filter.io.prefetch_req.valid := stream_bit_vec_array.io.prefetch_req.valid || stride_meta_array.io.prefetch_req.valid
6280d32f713Shappy-lx  pf_queue_filter.io.prefetch_req.bits := Mux(
6290d32f713Shappy-lx    stream_bit_vec_array.io.prefetch_req.valid,
6300d32f713Shappy-lx    stream_bit_vec_array.io.prefetch_req.bits,
6310d32f713Shappy-lx    stride_meta_array.io.prefetch_req.bits
6320d32f713Shappy-lx  )
6330d32f713Shappy-lx
6340d32f713Shappy-lx  io.l1_req.valid := pf_queue_filter.io.l1_req.valid && enable && pf_ctrl.enable
6350d32f713Shappy-lx  io.l1_req.bits := pf_queue_filter.io.l1_req.bits
6360d32f713Shappy-lx
6370d32f713Shappy-lx  pf_queue_filter.io.l1_req.ready := Mux(pf_ctrl.enable, io.l1_req.ready, true.B)
6380d32f713Shappy-lx  pf_queue_filter.io.tlb_req <> io.tlb_req
6390d32f713Shappy-lx  pf_queue_filter.io.enable := enable
6400d32f713Shappy-lx  pf_queue_filter.io.flush := flush
6410d32f713Shappy-lx  pf_queue_filter.io.confidence := pf_ctrl.confidence
6420d32f713Shappy-lx  pf_queue_filter.io.l2PfqBusy := l2PfqBusy
6430d32f713Shappy-lx
6440d32f713Shappy-lx  io.l2_req.valid := pf_queue_filter.io.l2_pf_addr.valid && pf_queue_filter.io.l2_pf_addr.bits.addr > 0x80000000L.U && enable && pf_ctrl.enable
6450d32f713Shappy-lx  io.l2_req.bits := pf_queue_filter.io.l2_pf_addr.bits
6460d32f713Shappy-lx
6470d32f713Shappy-lx  io.l3_req.valid := pf_queue_filter.io.l3_pf_addr.valid && pf_queue_filter.io.l3_pf_addr.bits > 0x80000000L.U && enable && pf_ctrl.enable
6480d32f713Shappy-lx  io.l3_req.bits := pf_queue_filter.io.l3_pf_addr.bits
6490d32f713Shappy-lx}