10d32f713Shappy-lxpackage xiangshan.mem.prefetch 20d32f713Shappy-lx 38891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters 40d32f713Shappy-lximport chisel3._ 50d32f713Shappy-lximport chisel3.util._ 69e12e8edScz4eimport freechips.rocketchip.util._ 70d32f713Shappy-lximport utils._ 80d32f713Shappy-lximport utility._ 99e12e8edScz4eimport xiangshan._ 1025a80bceSYanqin Liimport xiangshan.backend.fu.PMPRespBundle 119e12e8edScz4eimport xiangshan.mem.L1PrefetchReq 12*99ce5576Scz4eimport xiangshan.mem.Bundles.LsPrefetchTrainBundle 130d32f713Shappy-lximport xiangshan.mem.trace._ 140d32f713Shappy-lximport xiangshan.mem.L1PrefetchSource 159e12e8edScz4eimport xiangshan.cache.HasDCacheParameters 169e12e8edScz4eimport xiangshan.cache.mmu._ 170d32f713Shappy-lx 180d32f713Shappy-lxtrait HasL1PrefetchHelper extends HasCircularQueuePtrHelper with HasDCacheParameters { 190d32f713Shappy-lx // region related 200d32f713Shappy-lx val REGION_SIZE = 1024 210d32f713Shappy-lx val PAGE_OFFSET = 12 220d32f713Shappy-lx val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes) 230d32f713Shappy-lx val BIT_VEC_WITDH = REGION_SIZE / dcacheParameters.blockBytes 240d32f713Shappy-lx val REGION_BITS = log2Up(BIT_VEC_WITDH) 250d32f713Shappy-lx val REGION_TAG_OFFSET = BLOCK_OFFSET + REGION_BITS 260d32f713Shappy-lx val REGION_TAG_BITS = VAddrBits - BLOCK_OFFSET - REGION_BITS 270d32f713Shappy-lx 280d32f713Shappy-lx // hash related 290d32f713Shappy-lx val VADDR_HASH_WIDTH = 5 300d32f713Shappy-lx val BLK_ADDR_RAW_WIDTH = 10 310d32f713Shappy-lx val HASH_TAG_WIDTH = VADDR_HASH_WIDTH + BLK_ADDR_RAW_WIDTH 320d32f713Shappy-lx 330d32f713Shappy-lx // capacity related 3420e09ab1Shappy-lx val MLP_SIZE = 32 3520e09ab1Shappy-lx val MLP_L1_SIZE = 16 3620e09ab1Shappy-lx val MLP_L2L3_SIZE = MLP_SIZE - MLP_L1_SIZE 370d32f713Shappy-lx 380d32f713Shappy-lx // prefetch sink related 390d32f713Shappy-lx val SINK_BITS = 2 400d32f713Shappy-lx def SINK_L1 = "b00".U 410d32f713Shappy-lx def SINK_L2 = "b01".U 420d32f713Shappy-lx def SINK_L3 = "b10".U 430d32f713Shappy-lx 440d32f713Shappy-lx // vaddr: | region tag | region bits | block offset | 450d32f713Shappy-lx def get_region_tag(vaddr: UInt) = { 460d32f713Shappy-lx require(vaddr.getWidth == VAddrBits) 470d32f713Shappy-lx vaddr(vaddr.getWidth - 1, REGION_TAG_OFFSET) 480d32f713Shappy-lx } 490d32f713Shappy-lx 500d32f713Shappy-lx def get_region_bits(vaddr: UInt) = { 510d32f713Shappy-lx require(vaddr.getWidth == VAddrBits) 520d32f713Shappy-lx vaddr(REGION_TAG_OFFSET - 1, BLOCK_OFFSET) 530d32f713Shappy-lx } 540d32f713Shappy-lx 550d32f713Shappy-lx def block_addr(x: UInt): UInt = { 560d32f713Shappy-lx x(x.getWidth - 1, BLOCK_OFFSET) 570d32f713Shappy-lx } 580d32f713Shappy-lx 590d32f713Shappy-lx def vaddr_hash(x: UInt): UInt = { 600d32f713Shappy-lx val width = VADDR_HASH_WIDTH 610d32f713Shappy-lx val low = x(width - 1, 0) 620d32f713Shappy-lx val mid = x(2 * width - 1, width) 630d32f713Shappy-lx val high = x(3 * width - 1, 2 * width) 640d32f713Shappy-lx low ^ mid ^ high 650d32f713Shappy-lx } 660d32f713Shappy-lx 670d32f713Shappy-lx def pc_hash_tag(x: UInt): UInt = { 680d32f713Shappy-lx val low = x(BLK_ADDR_RAW_WIDTH - 1, 0) 690d32f713Shappy-lx val high = x(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH) 700d32f713Shappy-lx val high_hash = vaddr_hash(high) 710d32f713Shappy-lx Cat(high_hash, low) 720d32f713Shappy-lx } 730d32f713Shappy-lx 740d32f713Shappy-lx def block_hash_tag(x: UInt): UInt = { 750d32f713Shappy-lx val blk_addr = block_addr(x) 760d32f713Shappy-lx val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0) 770d32f713Shappy-lx val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH) 780d32f713Shappy-lx val high_hash = vaddr_hash(high) 790d32f713Shappy-lx Cat(high_hash, low) 800d32f713Shappy-lx } 810d32f713Shappy-lx 820d32f713Shappy-lx def region_hash_tag(region_tag: UInt): UInt = { 830d32f713Shappy-lx val low = region_tag(BLK_ADDR_RAW_WIDTH - 1, 0) 840d32f713Shappy-lx val high = region_tag(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH) 850d32f713Shappy-lx val high_hash = vaddr_hash(high) 860d32f713Shappy-lx Cat(high_hash, low) 870d32f713Shappy-lx } 880d32f713Shappy-lx 890d32f713Shappy-lx def region_to_block_addr(region_tag: UInt, region_bits: UInt): UInt = { 900d32f713Shappy-lx Cat(region_tag, region_bits) 910d32f713Shappy-lx } 920d32f713Shappy-lx 930d32f713Shappy-lx def get_candidate_oh(x: UInt): UInt = { 940d32f713Shappy-lx require(x.getWidth == PAddrBits) 950d32f713Shappy-lx UIntToOH(x(REGION_BITS + BLOCK_OFFSET - 1, BLOCK_OFFSET)) 960d32f713Shappy-lx } 970d32f713Shappy-lx 980d32f713Shappy-lx def toBinary(n: Int): String = n match { 990d32f713Shappy-lx case 0|1 => s"$n" 1000d32f713Shappy-lx case _ => s"${toBinary(n/2)}${n%2}" 1010d32f713Shappy-lx } 1020d32f713Shappy-lx} 1030d32f713Shappy-lx 1040d32f713Shappy-lxtrait HasTrainFilterHelper extends HasCircularQueuePtrHelper { 105*99ce5576Scz4e def reorder[T <: LsPrefetchTrainBundle](source: Vec[ValidIO[T]]): Vec[ValidIO[T]] = { 1060d32f713Shappy-lx if(source.length == 1) { 1070d32f713Shappy-lx source 1080d32f713Shappy-lx }else if(source.length == 2) { 1090d32f713Shappy-lx val source_v = source.map(_.valid) 1100d32f713Shappy-lx val res = Wire(source.cloneType) 111f410867eShappy-lx // source 1 is older than source 0 (only when source0/1 are both valid) 112f410867eShappy-lx val source_1_older = Mux(Cat(source_v).andR, 113f410867eShappy-lx isBefore(source(1).bits.uop.robIdx, source(0).bits.uop.robIdx), 114f410867eShappy-lx false.B 115f410867eShappy-lx ) 1160d32f713Shappy-lx when(source_1_older) { 1170d32f713Shappy-lx res(0) := source(1) 1180d32f713Shappy-lx res(1) := source(0) 1190d32f713Shappy-lx }.otherwise { 1200d32f713Shappy-lx res := source 1210d32f713Shappy-lx } 1220d32f713Shappy-lx 1230d32f713Shappy-lx res 1240d32f713Shappy-lx }else if(source.length == 3) { 1250d32f713Shappy-lx // TODO: generalize 126255bd5b1Slixin val res_0_1 = Reg(source.cloneType) 127255bd5b1Slixin val res_1_2 = Reg(source.cloneType) 128255bd5b1Slixin val res = Reg(source.cloneType) 1290d32f713Shappy-lx 1300d32f713Shappy-lx val tmp = reorder(VecInit(source.slice(0, 2))) 1310d32f713Shappy-lx res_0_1(0) := tmp(0) 1320d32f713Shappy-lx res_0_1(1) := tmp(1) 1330d32f713Shappy-lx res_0_1(2) := source(2) 1340d32f713Shappy-lx val tmp_1 = reorder(VecInit(res_0_1.slice(1, 3))) 1350d32f713Shappy-lx res_1_2(0) := res_0_1(0) 1360d32f713Shappy-lx res_1_2(1) := tmp_1(0) 1370d32f713Shappy-lx res_1_2(2) := tmp_1(1) 1380d32f713Shappy-lx val tmp_2 = reorder(VecInit(res_1_2.slice(0, 2))) 1390d32f713Shappy-lx res(0) := tmp_2(0) 1400d32f713Shappy-lx res(1) := tmp_2(1) 1410d32f713Shappy-lx res(2) := res_1_2(2) 1420d32f713Shappy-lx 1430d32f713Shappy-lx res 1440d32f713Shappy-lx }else { 1450d32f713Shappy-lx require(false, "for now, 4 or more sources are invalid") 1460d32f713Shappy-lx source 1470d32f713Shappy-lx } 1480d32f713Shappy-lx } 1490d32f713Shappy-lx} 1500d32f713Shappy-lx 1510d32f713Shappy-lx// get prefetch train reqs from `exuParameters.LduCnt` load pipelines (up to `exuParameters.LduCnt`/cycle) 1520d32f713Shappy-lx// filter by cache line address, send out train req to stride (up to 1 req/cycle) 1530d32f713Shappy-lxclass TrainFilter(size: Int, name: String)(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper with HasTrainFilterHelper { 1540d32f713Shappy-lx val io = IO(new Bundle() { 1550d32f713Shappy-lx val enable = Input(Bool()) 1560d32f713Shappy-lx val flush = Input(Bool()) 1570d32f713Shappy-lx // train input, only from load for now 158*99ce5576Scz4e val ld_in = Flipped(Vec(backendParams.LduCnt, ValidIO(new LsPrefetchTrainBundle()))) 1590d32f713Shappy-lx // filter out 1600d32f713Shappy-lx val train_req = DecoupledIO(new PrefetchReqBundle()) 1610d32f713Shappy-lx }) 1620d32f713Shappy-lx 1630d32f713Shappy-lx class Ptr(implicit p: Parameters) extends CircularQueuePtr[Ptr]( p => size ){} 1640d32f713Shappy-lx object Ptr { 1650d32f713Shappy-lx def apply(f: Bool, v: UInt)(implicit p: Parameters): Ptr = { 1660d32f713Shappy-lx val ptr = Wire(new Ptr) 1670d32f713Shappy-lx ptr.flag := f 1680d32f713Shappy-lx ptr.value := v 1690d32f713Shappy-lx ptr 1700d32f713Shappy-lx } 1710d32f713Shappy-lx } 1720d32f713Shappy-lx 17370eea123SYanqin Li val entries = Reg(Vec(size, new PrefetchReqBundle)) 1740d32f713Shappy-lx val valids = RegInit(VecInit(Seq.fill(size){ (false.B) })) 1750d32f713Shappy-lx 1760d32f713Shappy-lx // enq 17783ba63b3SXuan Hu val enqLen = backendParams.LduCnt 1780d32f713Shappy-lx val enqPtrExt = RegInit(VecInit((0 until enqLen).map(_.U.asTypeOf(new Ptr)))) 1790d32f713Shappy-lx val deqPtrExt = RegInit(0.U.asTypeOf(new Ptr)) 1800d32f713Shappy-lx 1810d32f713Shappy-lx val deqPtr = WireInit(deqPtrExt.value) 1820d32f713Shappy-lx 1830d32f713Shappy-lx require(size >= enqLen) 1840d32f713Shappy-lx 1850d32f713Shappy-lx val ld_in_reordered = reorder(io.ld_in) 186*99ce5576Scz4e val reqs_l = ld_in_reordered.map(_.bits.toPrefetchReqBundle()) 1870d32f713Shappy-lx val reqs_vl = ld_in_reordered.map(_.valid) 1880d32f713Shappy-lx val needAlloc = Wire(Vec(enqLen, Bool())) 1890d32f713Shappy-lx val canAlloc = Wire(Vec(enqLen, Bool())) 1900d32f713Shappy-lx 1910d32f713Shappy-lx for(i <- (0 until enqLen)) { 1920d32f713Shappy-lx val req = reqs_l(i) 1930d32f713Shappy-lx val req_v = reqs_vl(i) 1940d32f713Shappy-lx val index = PopCount(needAlloc.take(i)) 1950d32f713Shappy-lx val allocPtr = enqPtrExt(index) 1960d32f713Shappy-lx val entry_match = Cat(entries.zip(valids).map { 1970d32f713Shappy-lx case(e, v) => v && block_hash_tag(e.vaddr) === block_hash_tag(req.vaddr) 1980d32f713Shappy-lx }).orR 1990d32f713Shappy-lx val prev_enq_match = if(i == 0) false.B else Cat(reqs_l.zip(reqs_vl).take(i).map { 2000d32f713Shappy-lx case(pre, pre_v) => pre_v && block_hash_tag(pre.vaddr) === block_hash_tag(req.vaddr) 2010d32f713Shappy-lx }).orR 2020d32f713Shappy-lx 2030d32f713Shappy-lx needAlloc(i) := req_v && !entry_match && !prev_enq_match 2040d32f713Shappy-lx canAlloc(i) := needAlloc(i) && allocPtr >= deqPtrExt && io.enable 2050d32f713Shappy-lx 2060d32f713Shappy-lx when(canAlloc(i)) { 2070d32f713Shappy-lx valids(allocPtr.value) := true.B 2080d32f713Shappy-lx entries(allocPtr.value) := req 2090d32f713Shappy-lx } 2100d32f713Shappy-lx } 2110d32f713Shappy-lx val allocNum = PopCount(canAlloc) 2120d32f713Shappy-lx 2134ccb2e8bSYanqin Li enqPtrExt.foreach{case x => when(canAlloc.asUInt.orR) {x := x + allocNum} } 2140d32f713Shappy-lx 2150d32f713Shappy-lx // deq 2160d32f713Shappy-lx io.train_req.valid := false.B 2170d32f713Shappy-lx io.train_req.bits := DontCare 2180d32f713Shappy-lx valids.zip(entries).zipWithIndex.foreach { 2190d32f713Shappy-lx case((valid, entry), i) => { 2200d32f713Shappy-lx when(deqPtr === i.U) { 2210d32f713Shappy-lx io.train_req.valid := valid && io.enable 2220d32f713Shappy-lx io.train_req.bits := entry 2230d32f713Shappy-lx } 2240d32f713Shappy-lx } 2250d32f713Shappy-lx } 2260d32f713Shappy-lx 2270d32f713Shappy-lx when(io.train_req.fire) { 2280d32f713Shappy-lx valids(deqPtr) := false.B 2290d32f713Shappy-lx deqPtrExt := deqPtrExt + 1.U 2300d32f713Shappy-lx } 2310d32f713Shappy-lx 2320d32f713Shappy-lx when(RegNext(io.flush)) { 2330d32f713Shappy-lx valids.foreach {case valid => valid := false.B} 2340d32f713Shappy-lx (0 until enqLen).map {case i => enqPtrExt(i) := i.U.asTypeOf(new Ptr)} 2350d32f713Shappy-lx deqPtrExt := 0.U.asTypeOf(new Ptr) 2360d32f713Shappy-lx } 2370d32f713Shappy-lx 2380d32f713Shappy-lx XSPerfAccumulate(s"${name}_train_filter_full", PopCount(valids) === size.U) 2390d32f713Shappy-lx XSPerfAccumulate(s"${name}_train_filter_half", PopCount(valids) >= (size / 2).U) 2400d32f713Shappy-lx XSPerfAccumulate(s"${name}_train_filter_empty", PopCount(valids) === 0.U) 2410d32f713Shappy-lx 2420d32f713Shappy-lx val raw_enq_pattern = Cat(reqs_vl) 2430d32f713Shappy-lx val filtered_enq_pattern = Cat(needAlloc) 2440d32f713Shappy-lx val actual_enq_pattern = Cat(canAlloc) 2450d32f713Shappy-lx XSPerfAccumulate(s"${name}_train_filter_enq", allocNum > 0.U) 2460d32f713Shappy-lx XSPerfAccumulate(s"${name}_train_filter_deq", io.train_req.fire) 2470d32f713Shappy-lx for(i <- 0 until (1 << enqLen)) { 2480d32f713Shappy-lx XSPerfAccumulate(s"${name}_train_filter_raw_enq_pattern_${toBinary(i)}", raw_enq_pattern === i.U) 2490d32f713Shappy-lx XSPerfAccumulate(s"${name}_train_filter_filtered_enq_pattern_${toBinary(i)}", filtered_enq_pattern === i.U) 2500d32f713Shappy-lx XSPerfAccumulate(s"${name}_train_filter_actual_enq_pattern_${toBinary(i)}", actual_enq_pattern === i.U) 2510d32f713Shappy-lx } 2520d32f713Shappy-lx} 2530d32f713Shappy-lx 2540d32f713Shappy-lxclass MLPReqFilterBundle(implicit p: Parameters) extends XSBundle with HasL1PrefetchHelper { 2550d32f713Shappy-lx val tag = UInt(HASH_TAG_WIDTH.W) 2560d32f713Shappy-lx val region = UInt(REGION_TAG_BITS.W) 2570d32f713Shappy-lx val bit_vec = UInt(BIT_VEC_WITDH.W) 2580d32f713Shappy-lx // NOTE: l1 will not use sent_vec, for making more prefetch reqs to l1 dcache 2590d32f713Shappy-lx val sent_vec = UInt(BIT_VEC_WITDH.W) 2600d32f713Shappy-lx val sink = UInt(SINK_BITS.W) 2610d32f713Shappy-lx val alias = UInt(2.W) 2620d32f713Shappy-lx val is_vaddr = Bool() 2630d32f713Shappy-lx val source = new L1PrefetchSource() 26420e09ab1Shappy-lx val debug_va_region = UInt(REGION_TAG_BITS.W) 2650d32f713Shappy-lx 2660d32f713Shappy-lx def reset(index: Int) = { 2670d32f713Shappy-lx tag := region_hash_tag(index.U) 2680d32f713Shappy-lx region := index.U 2690d32f713Shappy-lx bit_vec := 0.U 2700d32f713Shappy-lx sent_vec := 0.U 2710d32f713Shappy-lx sink := SINK_L1 2720d32f713Shappy-lx alias := 0.U 2730d32f713Shappy-lx is_vaddr := false.B 2740d32f713Shappy-lx source.value := L1_HW_PREFETCH_NULL 27520e09ab1Shappy-lx debug_va_region := 0.U 2760d32f713Shappy-lx } 2770d32f713Shappy-lx 27870eea123SYanqin Li def tag_match(valid1: Bool, valid2: Bool, new_tag: UInt): Bool = { 2790d32f713Shappy-lx require(new_tag.getWidth == HASH_TAG_WIDTH) 28070eea123SYanqin Li (tag === new_tag) && valid1 && valid2 2810d32f713Shappy-lx } 2820d32f713Shappy-lx 2830d32f713Shappy-lx def update(update_bit_vec: UInt, update_sink: UInt) = { 2840d32f713Shappy-lx bit_vec := bit_vec | update_bit_vec 2850d32f713Shappy-lx when(update_sink < sink) { 2860d32f713Shappy-lx bit_vec := (bit_vec & ~sent_vec) | update_bit_vec 2870d32f713Shappy-lx sink := update_sink 2880d32f713Shappy-lx } 2890d32f713Shappy-lx 2900d32f713Shappy-lx assert(PopCount(update_bit_vec) >= 1.U, "valid bits in update vector should greater than one") 2910d32f713Shappy-lx } 2920d32f713Shappy-lx 29370eea123SYanqin Li def can_send_pf(valid: Bool): Bool = { 2940d32f713Shappy-lx Mux( 2950d32f713Shappy-lx sink === SINK_L1, 2960d32f713Shappy-lx !is_vaddr && bit_vec.orR, 2970d32f713Shappy-lx !is_vaddr && (bit_vec & ~sent_vec).orR 29820e09ab1Shappy-lx ) && valid 29920e09ab1Shappy-lx } 30020e09ab1Shappy-lx 30170eea123SYanqin Li def may_be_replace(valid: Bool): Bool = { 30220e09ab1Shappy-lx // either invalid or has sent out all reqs out 30320e09ab1Shappy-lx !valid || RegNext(PopCount(sent_vec) === BIT_VEC_WITDH.U) 3040d32f713Shappy-lx } 3050d32f713Shappy-lx 3060d32f713Shappy-lx def get_pf_addr(): UInt = { 3070d32f713Shappy-lx require(PAddrBits <= VAddrBits) 3080d32f713Shappy-lx require((region.getWidth + REGION_BITS + BLOCK_OFFSET) == VAddrBits) 3090d32f713Shappy-lx 3100d32f713Shappy-lx val candidate = Mux( 3110d32f713Shappy-lx sink === SINK_L1, 3120d32f713Shappy-lx PriorityEncoder(bit_vec).asTypeOf(UInt(REGION_BITS.W)), 3130d32f713Shappy-lx PriorityEncoder(bit_vec & ~sent_vec).asTypeOf(UInt(REGION_BITS.W)) 3140d32f713Shappy-lx ) 3150d32f713Shappy-lx Cat(region, candidate, 0.U(BLOCK_OFFSET.W)) 3160d32f713Shappy-lx } 3170d32f713Shappy-lx 31820e09ab1Shappy-lx def get_pf_debug_vaddr(): UInt = { 31920e09ab1Shappy-lx val candidate = Mux( 32020e09ab1Shappy-lx sink === SINK_L1, 32120e09ab1Shappy-lx PriorityEncoder(bit_vec).asTypeOf(UInt(REGION_BITS.W)), 32220e09ab1Shappy-lx PriorityEncoder(bit_vec & ~sent_vec).asTypeOf(UInt(REGION_BITS.W)) 32320e09ab1Shappy-lx ) 32420e09ab1Shappy-lx Cat(debug_va_region, candidate, 0.U(BLOCK_OFFSET.W)) 32520e09ab1Shappy-lx } 32620e09ab1Shappy-lx 3270d32f713Shappy-lx def get_tlb_va(): UInt = { 3280d32f713Shappy-lx require((region.getWidth + REGION_TAG_OFFSET) == VAddrBits) 3290d32f713Shappy-lx Cat(region, 0.U(REGION_TAG_OFFSET.W)) 3300d32f713Shappy-lx } 3310d32f713Shappy-lx 3320d32f713Shappy-lx def fromStreamPrefetchReqBundle(x : StreamPrefetchReqBundle): MLPReqFilterBundle = { 3330d32f713Shappy-lx require(PAGE_OFFSET >= REGION_TAG_OFFSET, "region is greater than 4k, alias bit may be incorrect") 3340d32f713Shappy-lx 3350d32f713Shappy-lx val res = Wire(new MLPReqFilterBundle) 3360d32f713Shappy-lx res.tag := region_hash_tag(x.region) 3370d32f713Shappy-lx res.region := x.region 3380d32f713Shappy-lx res.bit_vec := x.bit_vec 3390d32f713Shappy-lx res.sent_vec := 0.U 3400d32f713Shappy-lx res.sink := x.sink 3410d32f713Shappy-lx res.is_vaddr := true.B 3420d32f713Shappy-lx res.source := x.source 3430d32f713Shappy-lx res.alias := x.region(PAGE_OFFSET - REGION_TAG_OFFSET + 1, PAGE_OFFSET - REGION_TAG_OFFSET) 34420e09ab1Shappy-lx res.debug_va_region := x.region 3450d32f713Shappy-lx 3460d32f713Shappy-lx res 3470d32f713Shappy-lx } 3480d32f713Shappy-lx 3490d32f713Shappy-lx def invalidate() = { 3500d32f713Shappy-lx // disable sending pf req 3510d32f713Shappy-lx when(sink === SINK_L1) { 3520d32f713Shappy-lx bit_vec := 0.U(BIT_VEC_WITDH.W) 3530d32f713Shappy-lx }.otherwise { 3540d32f713Shappy-lx sent_vec := ~(0.U(BIT_VEC_WITDH.W)) 3550d32f713Shappy-lx } 3560d32f713Shappy-lx // disable sending tlb req 3570d32f713Shappy-lx is_vaddr := false.B 3580d32f713Shappy-lx } 3590d32f713Shappy-lx} 3600d32f713Shappy-lx 3610d32f713Shappy-lx// there are 5 independent pipelines inside 3620d32f713Shappy-lx// 1. prefetch enqueue 3630d32f713Shappy-lx// 2. tlb request 3640d32f713Shappy-lx// 3. actual l1 prefetch 3650d32f713Shappy-lx// 4. actual l2 prefetch 3660d32f713Shappy-lx// 5. actual l3 prefetch 3670d32f713Shappy-lxclass MutiLevelPrefetchFilter(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper { 3680d32f713Shappy-lx val io = IO(new XSBundle { 3690d32f713Shappy-lx val enable = Input(Bool()) 3700d32f713Shappy-lx val flush = Input(Bool()) 37120e09ab1Shappy-lx val l1_prefetch_req = Flipped(ValidIO(new StreamPrefetchReqBundle)) 37220e09ab1Shappy-lx val l2_l3_prefetch_req = Flipped(ValidIO(new StreamPrefetchReqBundle)) 3730d32f713Shappy-lx val tlb_req = new TlbRequestIO(nRespDups = 2) 37425a80bceSYanqin Li val pmp_resp = Flipped(new PMPRespBundle()) 3750d32f713Shappy-lx val l1_req = DecoupledIO(new L1PrefetchReq()) 3760d32f713Shappy-lx val l2_pf_addr = ValidIO(new L2PrefetchReq()) 3770d32f713Shappy-lx val l3_pf_addr = ValidIO(UInt(PAddrBits.W)) // TODO: l3 pf source 3780d32f713Shappy-lx val confidence = Input(UInt(1.W)) 3790d32f713Shappy-lx val l2PfqBusy = Input(Bool()) 3800d32f713Shappy-lx }) 3810d32f713Shappy-lx 38220e09ab1Shappy-lx val l1_array = Reg(Vec(MLP_L1_SIZE, new MLPReqFilterBundle)) 38320e09ab1Shappy-lx val l2_array = Reg(Vec(MLP_L2L3_SIZE, new MLPReqFilterBundle)) 38470eea123SYanqin Li val l1_valids = RegInit(VecInit(Seq.fill(MLP_L1_SIZE)(false.B))) 38570eea123SYanqin Li val l2_valids = RegInit(VecInit(Seq.fill(MLP_L2L3_SIZE)(false.B))) 38670eea123SYanqin Li 38770eea123SYanqin Li def _invalid(e: MLPReqFilterBundle, v: Bool): Unit = { 38870eea123SYanqin Li v := false.B 38970eea123SYanqin Li e.invalidate() 39070eea123SYanqin Li } 39170eea123SYanqin Li 39270eea123SYanqin Li def invalid_array(i: UInt, isL2: Boolean): Unit = { 39370eea123SYanqin Li if (isL2) { 39470eea123SYanqin Li _invalid(l2_array(i), l2_valids(i)) 39570eea123SYanqin Li } else { 39670eea123SYanqin Li _invalid(l1_array(i), l1_valids(i)) 39770eea123SYanqin Li } 39870eea123SYanqin Li } 39970eea123SYanqin Li 40070eea123SYanqin Li def _reset(e: MLPReqFilterBundle, v: Bool, idx: Int): Unit = { 40170eea123SYanqin Li v := false.B 40270eea123SYanqin Li //only need to reset control signals for firendly area 40370eea123SYanqin Li // e.reset(idx) 40470eea123SYanqin Li } 40570eea123SYanqin Li 40670eea123SYanqin Li 40770eea123SYanqin Li def reset_array(i: Int, isL2: Boolean): Unit = { 40870eea123SYanqin Li if(isL2){ 40970eea123SYanqin Li _reset(l2_array(i), l2_valids(i), i) 41070eea123SYanqin Li }else{ 41170eea123SYanqin Li _reset(l1_array(i), l1_valids(i), i) 41270eea123SYanqin Li } 41370eea123SYanqin Li } 41470eea123SYanqin Li 41520e09ab1Shappy-lx val l1_replacement = new ValidPseudoLRU(MLP_L1_SIZE) 41620e09ab1Shappy-lx val l2_replacement = new ValidPseudoLRU(MLP_L2L3_SIZE) 4170d32f713Shappy-lx val tlb_req_arb = Module(new RRArbiterInit(new TlbReq, MLP_SIZE)) 41820e09ab1Shappy-lx val l1_pf_req_arb = Module(new RRArbiterInit(new Bundle { 41920e09ab1Shappy-lx val req = new L1PrefetchReq 42020e09ab1Shappy-lx val debug_vaddr = UInt(VAddrBits.W) 42120e09ab1Shappy-lx }, MLP_L1_SIZE)) 42220e09ab1Shappy-lx val l2_pf_req_arb = Module(new RRArbiterInit(new Bundle { 42320e09ab1Shappy-lx val req = new L2PrefetchReq 42420e09ab1Shappy-lx val debug_vaddr = UInt(VAddrBits.W) 42520e09ab1Shappy-lx }, MLP_L2L3_SIZE)) 42620e09ab1Shappy-lx val l3_pf_req_arb = Module(new RRArbiterInit(UInt(PAddrBits.W), MLP_L2L3_SIZE)) 4270d32f713Shappy-lx 42870eea123SYanqin Li val l1_opt_replace_vec = VecInit(l1_array.zip(l1_valids).map{case (e, v) => e.may_be_replace(v)}) 42970eea123SYanqin Li val l2_opt_replace_vec = VecInit(l2_array.zip(l2_valids).map{case (e, v) => e.may_be_replace(v)}) 43020e09ab1Shappy-lx // if we have something to replace, then choose it, otherwise follow the plru manner 43120e09ab1Shappy-lx val l1_real_replace_vec = Mux(Cat(l1_opt_replace_vec).orR, l1_opt_replace_vec, VecInit(Seq.fill(MLP_L1_SIZE)(true.B))) 43220e09ab1Shappy-lx val l2_real_replace_vec = Mux(Cat(l2_opt_replace_vec).orR, l2_opt_replace_vec, VecInit(Seq.fill(MLP_L2L3_SIZE)(true.B))) 43320e09ab1Shappy-lx 43420e09ab1Shappy-lx // l1 pf req enq 4350d32f713Shappy-lx // s0: hash tag match 43620e09ab1Shappy-lx val s0_l1_can_accept = Wire(Bool()) 43720e09ab1Shappy-lx val s0_l1_valid = io.l1_prefetch_req.valid && s0_l1_can_accept 43820e09ab1Shappy-lx val s0_l1_region = io.l1_prefetch_req.bits.region 43920e09ab1Shappy-lx val s0_l1_region_hash = region_hash_tag(s0_l1_region) 44070eea123SYanqin Li val s0_l1_match_vec = l1_array.zip(l1_valids).map{ case (e, v) => e.tag_match(v, s0_l1_valid, s0_l1_region_hash)} 44120e09ab1Shappy-lx val s0_l1_hit = VecInit(s0_l1_match_vec).asUInt.orR 44220e09ab1Shappy-lx val s0_l1_index = Wire(UInt(log2Up(MLP_L1_SIZE).W)) 44320e09ab1Shappy-lx val s0_l1_prefetch_req = (new MLPReqFilterBundle).fromStreamPrefetchReqBundle(io.l1_prefetch_req.bits) 4440d32f713Shappy-lx 44520e09ab1Shappy-lx s0_l1_index := Mux(s0_l1_hit, OHToUInt(VecInit(s0_l1_match_vec).asUInt), l1_replacement.way(l1_real_replace_vec.reverse)._2) 44620e09ab1Shappy-lx 44720e09ab1Shappy-lx when(s0_l1_valid) { 44820e09ab1Shappy-lx l1_replacement.access(s0_l1_index) 4490d32f713Shappy-lx } 4500d32f713Shappy-lx 45120e09ab1Shappy-lx assert(!s0_l1_valid || PopCount(VecInit(s0_l1_match_vec)) <= 1.U, "req region should match no more than 1 entry") 4520d32f713Shappy-lx 45320e09ab1Shappy-lx XSPerfAccumulate("s0_l1_enq_fire", s0_l1_valid) 45420e09ab1Shappy-lx XSPerfAccumulate("s0_l1_enq_valid", io.l1_prefetch_req.valid) 45520e09ab1Shappy-lx XSPerfAccumulate("s0_l1_cannot_enq", io.l1_prefetch_req.valid && !s0_l1_can_accept) 4560d32f713Shappy-lx 4570d32f713Shappy-lx // s1: alloc or update 45820e09ab1Shappy-lx val s1_l1_valid = RegNext(s0_l1_valid) 45920e09ab1Shappy-lx val s1_l1_region = RegEnable(s0_l1_region, s0_l1_valid) 46020e09ab1Shappy-lx val s1_l1_region_hash = RegEnable(s0_l1_region_hash, s0_l1_valid) 46120e09ab1Shappy-lx val s1_l1_hit = RegEnable(s0_l1_hit, s0_l1_valid) 46220e09ab1Shappy-lx val s1_l1_index = RegEnable(s0_l1_index, s0_l1_valid) 46320e09ab1Shappy-lx val s1_l1_prefetch_req = RegEnable(s0_l1_prefetch_req, s0_l1_valid) 46420e09ab1Shappy-lx val s1_l1_alloc = s1_l1_valid && !s1_l1_hit 46520e09ab1Shappy-lx val s1_l1_update = s1_l1_valid && s1_l1_hit 46620e09ab1Shappy-lx s0_l1_can_accept := !(s1_l1_valid && s1_l1_alloc && (s0_l1_region_hash === s1_l1_region_hash)) 4670d32f713Shappy-lx 46820e09ab1Shappy-lx when(s1_l1_alloc) { 46970eea123SYanqin Li l1_valids(s1_l1_index) := true.B 47020e09ab1Shappy-lx l1_array(s1_l1_index) := s1_l1_prefetch_req 47120e09ab1Shappy-lx }.elsewhen(s1_l1_update) { 47220e09ab1Shappy-lx l1_array(s1_l1_index).update( 47320e09ab1Shappy-lx update_bit_vec = s1_l1_prefetch_req.bit_vec, 47420e09ab1Shappy-lx update_sink = s1_l1_prefetch_req.sink 4750d32f713Shappy-lx ) 4760d32f713Shappy-lx } 4770d32f713Shappy-lx 47820e09ab1Shappy-lx XSPerfAccumulate("s1_l1_enq_valid", s1_l1_valid) 47920e09ab1Shappy-lx XSPerfAccumulate("s1_l1_enq_alloc", s1_l1_alloc) 48020e09ab1Shappy-lx XSPerfAccumulate("s1_l1_enq_update", s1_l1_update) 48120e09ab1Shappy-lx XSPerfAccumulate("l1_hash_conflict", s0_l1_valid && RegNext(s1_l1_valid) && (s0_l1_region =/= RegNext(s1_l1_region)) && (s0_l1_region_hash === RegNext(s1_l1_region_hash))) 48270eea123SYanqin Li XSPerfAccumulate("s1_l1_enq_evict_useful_entry", s1_l1_alloc && l1_array(s1_l1_index).can_send_pf(l1_valids(s1_l1_index))) 4830d32f713Shappy-lx 48420e09ab1Shappy-lx // l2 l3 pf req enq 48520e09ab1Shappy-lx // s0: hash tag match 48620e09ab1Shappy-lx val s0_l2_can_accept = Wire(Bool()) 48720e09ab1Shappy-lx val s0_l2_valid = io.l2_l3_prefetch_req.valid && s0_l2_can_accept 48820e09ab1Shappy-lx val s0_l2_region = io.l2_l3_prefetch_req.bits.region 48920e09ab1Shappy-lx val s0_l2_region_hash = region_hash_tag(s0_l2_region) 49070eea123SYanqin Li val s0_l2_match_vec = l2_array.zip(l2_valids).map{ case (e, v) => e.tag_match(v, s0_l2_valid, s0_l2_region_hash) } 49120e09ab1Shappy-lx val s0_l2_hit = VecInit(s0_l2_match_vec).asUInt.orR 49220e09ab1Shappy-lx val s0_l2_index = Wire(UInt(log2Up(MLP_L2L3_SIZE).W)) 49320e09ab1Shappy-lx val s0_l2_prefetch_req = (new MLPReqFilterBundle).fromStreamPrefetchReqBundle(io.l2_l3_prefetch_req.bits) 49420e09ab1Shappy-lx 49520e09ab1Shappy-lx s0_l2_index := Mux(s0_l2_hit, OHToUInt(VecInit(s0_l2_match_vec).asUInt), l2_replacement.way(l2_real_replace_vec.reverse)._2) 49620e09ab1Shappy-lx 49720e09ab1Shappy-lx when(s0_l2_valid) { 49820e09ab1Shappy-lx l2_replacement.access(s0_l2_index) 49920e09ab1Shappy-lx } 50020e09ab1Shappy-lx 50120e09ab1Shappy-lx assert(!s0_l2_valid || PopCount(VecInit(s0_l2_match_vec)) <= 1.U, "req region should match no more than 1 entry") 50220e09ab1Shappy-lx 50320e09ab1Shappy-lx XSPerfAccumulate("s0_l2_enq_fire", s0_l2_valid) 50420e09ab1Shappy-lx XSPerfAccumulate("s0_l2_enq_valid", io.l2_l3_prefetch_req.valid) 50520e09ab1Shappy-lx XSPerfAccumulate("s0_l2_cannot_enq", io.l2_l3_prefetch_req.valid && !s0_l2_can_accept) 50620e09ab1Shappy-lx 50720e09ab1Shappy-lx // s1: alloc or update 50820e09ab1Shappy-lx val s1_l2_valid = RegNext(s0_l2_valid) 50920e09ab1Shappy-lx val s1_l2_region = RegEnable(s0_l2_region, s0_l2_valid) 51020e09ab1Shappy-lx val s1_l2_region_hash = RegEnable(s0_l2_region_hash, s0_l2_valid) 51120e09ab1Shappy-lx val s1_l2_hit = RegEnable(s0_l2_hit, s0_l2_valid) 51220e09ab1Shappy-lx val s1_l2_index = RegEnable(s0_l2_index, s0_l2_valid) 51320e09ab1Shappy-lx val s1_l2_prefetch_req = RegEnable(s0_l2_prefetch_req, s0_l2_valid) 51420e09ab1Shappy-lx val s1_l2_alloc = s1_l2_valid && !s1_l2_hit 51520e09ab1Shappy-lx val s1_l2_update = s1_l2_valid && s1_l2_hit 51620e09ab1Shappy-lx s0_l2_can_accept := !(s1_l2_valid && s1_l2_alloc && (s0_l2_region_hash === s1_l2_region_hash)) 51720e09ab1Shappy-lx 51820e09ab1Shappy-lx when(s1_l2_alloc) { 51970eea123SYanqin Li l2_valids(s1_l2_index) := true.B 52020e09ab1Shappy-lx l2_array(s1_l2_index) := s1_l2_prefetch_req 52120e09ab1Shappy-lx }.elsewhen(s1_l2_update) { 52220e09ab1Shappy-lx l2_array(s1_l2_index).update( 52320e09ab1Shappy-lx update_bit_vec = s1_l2_prefetch_req.bit_vec, 52420e09ab1Shappy-lx update_sink = s1_l2_prefetch_req.sink 52520e09ab1Shappy-lx ) 52620e09ab1Shappy-lx } 52720e09ab1Shappy-lx 52820e09ab1Shappy-lx XSPerfAccumulate("s1_l2_enq_valid", s1_l2_valid) 52920e09ab1Shappy-lx XSPerfAccumulate("s1_l2_enq_alloc", s1_l2_alloc) 53020e09ab1Shappy-lx XSPerfAccumulate("s1_l2_enq_update", s1_l2_update) 53120e09ab1Shappy-lx XSPerfAccumulate("l2_hash_conflict", s0_l2_valid && RegNext(s1_l2_valid) && (s0_l2_region =/= RegNext(s1_l2_region)) && (s0_l2_region_hash === RegNext(s1_l2_region_hash))) 53270eea123SYanqin Li XSPerfAccumulate("s1_l2_enq_evict_useful_entry", s1_l2_alloc && l2_array(s1_l2_index).can_send_pf(l2_valids(s1_l2_index))) 53320e09ab1Shappy-lx 53420e09ab1Shappy-lx // stream pf debug db here 53520e09ab1Shappy-lx // Hit: 53620e09ab1Shappy-lx // now seens only pending = (region_bits & ~filter_bits) are the peeding request 53720e09ab1Shappy-lx // if a PfGen comes, new added request can be new_req = PfGen.region_bits & ~(pending) 53820e09ab1Shappy-lx // Alloc: 53920e09ab1Shappy-lx // new_req = PfGen.region_bits 54020e09ab1Shappy-lx val stream_pf_trace_debug_table = ChiselDB.createTable("StreamPFTrace" + p(XSCoreParamsKey).HartId.toString, new StreamPFTraceInEntry, basicDB = false) 54120e09ab1Shappy-lx for (i <- 0 until BIT_VEC_WITDH) { 54220e09ab1Shappy-lx // l1 enq log 54320e09ab1Shappy-lx val hit_entry = l1_array(s0_l1_index) 54420e09ab1Shappy-lx val new_req = Mux( 54520e09ab1Shappy-lx s0_l1_hit, 54620e09ab1Shappy-lx io.l1_prefetch_req.bits.bit_vec & ~(hit_entry.bit_vec), 54720e09ab1Shappy-lx io.l1_prefetch_req.bits.bit_vec 54820e09ab1Shappy-lx ) 54920e09ab1Shappy-lx val log_enable = s0_l1_valid && new_req(i) && (io.l1_prefetch_req.bits.source.value === L1_HW_PREFETCH_STREAM) 55020e09ab1Shappy-lx val log_data = Wire(new StreamPFTraceInEntry) 55120e09ab1Shappy-lx 55220e09ab1Shappy-lx log_data.TriggerPC := io.l1_prefetch_req.bits.trigger_pc 55320e09ab1Shappy-lx log_data.TriggerVaddr := io.l1_prefetch_req.bits.trigger_va 55420e09ab1Shappy-lx log_data.PFVaddr := Cat(s0_l1_region, i.U(REGION_BITS.W), 0.U(log2Up(dcacheParameters.blockBytes).W)) 55520e09ab1Shappy-lx log_data.PFSink := s0_l1_prefetch_req.sink 55620e09ab1Shappy-lx 55720e09ab1Shappy-lx stream_pf_trace_debug_table.log( 55820e09ab1Shappy-lx data = log_data, 55920e09ab1Shappy-lx en = log_enable, 56020e09ab1Shappy-lx site = "StreamPFTrace", 56120e09ab1Shappy-lx clock = clock, 56220e09ab1Shappy-lx reset = reset 56320e09ab1Shappy-lx ) 56420e09ab1Shappy-lx } 56520e09ab1Shappy-lx for (i <- 0 until BIT_VEC_WITDH) { 56620e09ab1Shappy-lx // l2 l3 enq log 56720e09ab1Shappy-lx val hit_entry = l2_array(s0_l2_index) 56820e09ab1Shappy-lx val new_req = Mux( 56920e09ab1Shappy-lx s0_l2_hit, 57020e09ab1Shappy-lx io.l2_l3_prefetch_req.bits.bit_vec & ~(hit_entry.bit_vec), 57120e09ab1Shappy-lx io.l2_l3_prefetch_req.bits.bit_vec 57220e09ab1Shappy-lx ) 57320e09ab1Shappy-lx val log_enable = s0_l2_valid && new_req(i) && (io.l2_l3_prefetch_req.bits.source.value === L1_HW_PREFETCH_STREAM) 57420e09ab1Shappy-lx val log_data = Wire(new StreamPFTraceInEntry) 57520e09ab1Shappy-lx 57620e09ab1Shappy-lx log_data.TriggerPC := io.l2_l3_prefetch_req.bits.trigger_pc 57720e09ab1Shappy-lx log_data.TriggerVaddr := io.l2_l3_prefetch_req.bits.trigger_va 57820e09ab1Shappy-lx log_data.PFVaddr := Cat(s0_l2_region, i.U(REGION_BITS.W), 0.U(log2Up(dcacheParameters.blockBytes).W)) 57920e09ab1Shappy-lx log_data.PFSink := s0_l2_prefetch_req.sink 58020e09ab1Shappy-lx 58120e09ab1Shappy-lx stream_pf_trace_debug_table.log( 58220e09ab1Shappy-lx data = log_data, 58320e09ab1Shappy-lx en = log_enable, 58420e09ab1Shappy-lx site = "StreamPFTrace", 58520e09ab1Shappy-lx clock = clock, 58620e09ab1Shappy-lx reset = reset 58720e09ab1Shappy-lx ) 58820e09ab1Shappy-lx } 5890d32f713Shappy-lx 5900d32f713Shappy-lx // tlb req 5910d32f713Shappy-lx // s0: arb all tlb reqs 5920d32f713Shappy-lx val s0_tlb_fire_vec = VecInit((0 until MLP_SIZE).map{case i => tlb_req_arb.io.in(i).fire}) 5934ccb2e8bSYanqin Li val s1_tlb_fire_vec = GatedValidRegNext(s0_tlb_fire_vec) 5944ccb2e8bSYanqin Li val s2_tlb_fire_vec = GatedValidRegNext(s1_tlb_fire_vec) 59525a80bceSYanqin Li val s3_tlb_fire_vec = GatedValidRegNext(s2_tlb_fire_vec) 59625a80bceSYanqin Li val not_tlbing_vec = VecInit((0 until MLP_SIZE).map{case i => 59725a80bceSYanqin Li !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !s3_tlb_fire_vec(i) 59825a80bceSYanqin Li }) 5990d32f713Shappy-lx 6000d32f713Shappy-lx for(i <- 0 until MLP_SIZE) { 60120e09ab1Shappy-lx val l1_evict = s1_l1_alloc && (s1_l1_index === i.U) 60220e09ab1Shappy-lx val l2_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === i.U) 60320e09ab1Shappy-lx if(i < MLP_L1_SIZE) { 60425a80bceSYanqin Li tlb_req_arb.io.in(i).valid := l1_valids(i) && l1_array(i).is_vaddr && not_tlbing_vec(i) && !l1_evict 60520e09ab1Shappy-lx tlb_req_arb.io.in(i).bits.vaddr := l1_array(i).get_tlb_va() 60620e09ab1Shappy-lx }else { 60725a80bceSYanqin Li tlb_req_arb.io.in(i).valid := l2_valids(i - MLP_L1_SIZE) && l2_array(i - MLP_L1_SIZE).is_vaddr && not_tlbing_vec(i) && !l2_evict 60820e09ab1Shappy-lx tlb_req_arb.io.in(i).bits.vaddr := l2_array(i - MLP_L1_SIZE).get_tlb_va() 60920e09ab1Shappy-lx } 6100d32f713Shappy-lx tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read 6118a4dab4dSHaoyuan Feng tlb_req_arb.io.in(i).bits.isPrefetch := true.B 6120d32f713Shappy-lx tlb_req_arb.io.in(i).bits.size := 3.U 6130d32f713Shappy-lx tlb_req_arb.io.in(i).bits.kill := false.B 6140d32f713Shappy-lx tlb_req_arb.io.in(i).bits.no_translate := false.B 615db6cfb5aSHaoyuan Feng tlb_req_arb.io.in(i).bits.fullva := 0.U 616db6cfb5aSHaoyuan Feng tlb_req_arb.io.in(i).bits.checkfullva := false.B 6170d32f713Shappy-lx tlb_req_arb.io.in(i).bits.memidx := DontCare 6180d32f713Shappy-lx tlb_req_arb.io.in(i).bits.debug := DontCare 6193d951cfaSpeixiaokun tlb_req_arb.io.in(i).bits.hlvx := DontCare 6203d951cfaSpeixiaokun tlb_req_arb.io.in(i).bits.hyperinst := DontCare 621149a2326Sweiding liu tlb_req_arb.io.in(i).bits.pmp_addr := DontCare 6220d32f713Shappy-lx } 6230d32f713Shappy-lx 6240d32f713Shappy-lx assert(PopCount(s0_tlb_fire_vec) <= 1.U, "s0_tlb_fire_vec should be one-hot or empty") 6250d32f713Shappy-lx 6260d32f713Shappy-lx // s1: send out the req 6274ccb2e8bSYanqin Li val s1_tlb_req_valid = GatedValidRegNext(tlb_req_arb.io.out.valid) 6280d32f713Shappy-lx val s1_tlb_req_bits = RegEnable(tlb_req_arb.io.out.bits, tlb_req_arb.io.out.valid) 6290d32f713Shappy-lx val s1_tlb_req_index = RegEnable(OHToUInt(s0_tlb_fire_vec.asUInt), tlb_req_arb.io.out.valid) 63020e09ab1Shappy-lx val s1_l1_tlb_evict = s1_l1_alloc && (s1_l1_index === s1_tlb_req_index) 63120e09ab1Shappy-lx val s1_l2_tlb_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === s1_tlb_req_index) 63220e09ab1Shappy-lx val s1_tlb_evict = s1_l1_tlb_evict || s1_l2_tlb_evict 6330d32f713Shappy-lx io.tlb_req.req.valid := s1_tlb_req_valid && !s1_tlb_evict 6340d32f713Shappy-lx io.tlb_req.req.bits := s1_tlb_req_bits 6350d32f713Shappy-lx io.tlb_req.req_kill := false.B 6360d32f713Shappy-lx tlb_req_arb.io.out.ready := true.B 6370d32f713Shappy-lx 6380d32f713Shappy-lx XSPerfAccumulate("s1_tlb_req_sent", io.tlb_req.req.valid) 6390d32f713Shappy-lx XSPerfAccumulate("s1_tlb_req_evict", s1_tlb_req_valid && s1_tlb_evict) 6400d32f713Shappy-lx 6410d32f713Shappy-lx // s2: get response from tlb 64225a80bceSYanqin Li val s2_tlb_resp_valid = io.tlb_req.resp.valid 64325a80bceSYanqin Li val s2_tlb_resp = io.tlb_req.resp.bits 6440d32f713Shappy-lx val s2_tlb_update_index = RegEnable(s1_tlb_req_index, s1_tlb_req_valid) 64520e09ab1Shappy-lx val s2_l1_tlb_evict = s1_l1_alloc && (s1_l1_index === s2_tlb_update_index) 64620e09ab1Shappy-lx val s2_l2_tlb_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === s2_tlb_update_index) 64720e09ab1Shappy-lx val s2_tlb_evict = s2_l1_tlb_evict || s2_l2_tlb_evict 6480d32f713Shappy-lx 64925a80bceSYanqin Li // s3: get pmp response form PMPChecker 65025a80bceSYanqin Li val s3_tlb_resp_valid = RegNext(s2_tlb_resp_valid) 65125a80bceSYanqin Li val s3_tlb_resp = RegEnable(s2_tlb_resp, s2_tlb_resp_valid) 65225a80bceSYanqin Li val s3_tlb_update_index = RegEnable(s2_tlb_update_index, s2_tlb_resp_valid) 65325a80bceSYanqin Li val s3_tlb_evict = RegNext(s2_tlb_evict) 65425a80bceSYanqin Li val s3_pmp_resp = io.pmp_resp 65525a80bceSYanqin Li val s3_update_valid = s3_tlb_resp_valid && !s3_tlb_evict && !s3_tlb_resp.miss 65625a80bceSYanqin Li val s3_drop = s3_update_valid && ( 65725a80bceSYanqin Li // page/access fault 65825a80bceSYanqin Li s3_tlb_resp.excp.head.pf.ld || s3_tlb_resp.excp.head.gpf.ld || s3_tlb_resp.excp.head.af.ld || 65925a80bceSYanqin Li // uncache 66025a80bceSYanqin Li s3_pmp_resp.mmio || Pbmt.isUncache(s3_tlb_resp.pbmt.head) || 66125a80bceSYanqin Li // pmp access fault 66225a80bceSYanqin Li s3_pmp_resp.ld 66325a80bceSYanqin Li ) 66425a80bceSYanqin Li when(s3_tlb_resp_valid && !s3_tlb_evict) { 66525a80bceSYanqin Li when(s3_tlb_update_index < MLP_L1_SIZE.U) { 66625a80bceSYanqin Li l1_array(s3_tlb_update_index).is_vaddr := s3_tlb_resp.miss 66725a80bceSYanqin Li 66825a80bceSYanqin Li when(!s3_tlb_resp.miss) { 66925a80bceSYanqin Li l1_array(s3_tlb_update_index).region := Cat(0.U((VAddrBits - PAddrBits).W), s3_tlb_resp.paddr.head(s3_tlb_resp.paddr.head.getWidth - 1, REGION_TAG_OFFSET)) 67025a80bceSYanqin Li when(s3_drop) { 67125a80bceSYanqin Li invalid_array(s3_tlb_update_index, false) 67220e09ab1Shappy-lx } 67320e09ab1Shappy-lx } 67420e09ab1Shappy-lx }.otherwise { 67525a80bceSYanqin Li val inner_index = s3_tlb_update_index - MLP_L1_SIZE.U 67625a80bceSYanqin Li l2_array(inner_index).is_vaddr := s3_tlb_resp.miss 67720e09ab1Shappy-lx 67825a80bceSYanqin Li when(!s3_tlb_resp.miss) { 67925a80bceSYanqin Li l2_array(inner_index).region := Cat(0.U((VAddrBits - PAddrBits).W), s3_tlb_resp.paddr.head(s3_tlb_resp.paddr.head.getWidth - 1, REGION_TAG_OFFSET)) 68025a80bceSYanqin Li when(s3_drop) { 68170eea123SYanqin Li invalid_array(inner_index, true) 68220e09ab1Shappy-lx } 6830d32f713Shappy-lx } 6840d32f713Shappy-lx } 6850d32f713Shappy-lx } 68625a80bceSYanqin Li io.tlb_req.resp.ready := true.B 6870d32f713Shappy-lx 68825a80bceSYanqin Li XSPerfAccumulate("s3_tlb_resp_valid", s3_tlb_resp_valid) 68925a80bceSYanqin Li XSPerfAccumulate("s3_tlb_resp_evict", s3_tlb_resp_valid && s3_tlb_evict) 69025a80bceSYanqin Li XSPerfAccumulate("s3_tlb_resp_miss", s3_tlb_resp_valid && !s3_tlb_evict && s3_tlb_resp.miss) 69125a80bceSYanqin Li XSPerfAccumulate("s3_tlb_resp_updated", s3_update_valid) 69225a80bceSYanqin Li XSPerfAccumulate("s3_tlb_resp_page_fault", s3_update_valid && s3_tlb_resp.excp.head.pf.ld) 69325a80bceSYanqin Li XSPerfAccumulate("s3_tlb_resp_guestpage_fault", s3_update_valid && s3_tlb_resp.excp.head.gpf.ld) 69425a80bceSYanqin Li XSPerfAccumulate("s3_tlb_resp_access_fault", s3_update_valid && s3_tlb_resp.excp.head.af.ld) 69525a80bceSYanqin Li XSPerfAccumulate("s3_tlb_resp_pmp_access_fault", s3_update_valid && s3_pmp_resp.ld) 69625a80bceSYanqin Li XSPerfAccumulate("s3_tlb_resp_uncache", s3_update_valid && (Pbmt.isUncache(s3_tlb_resp.pbmt.head) || s3_pmp_resp.mmio)) 6970d32f713Shappy-lx 6980d32f713Shappy-lx // l1 pf 6990d32f713Shappy-lx // s0: generate prefetch req paddr per entry, arb them 70020e09ab1Shappy-lx val s0_pf_fire_vec = VecInit((0 until MLP_L1_SIZE).map{case i => l1_pf_req_arb.io.in(i).fire}) 7014ccb2e8bSYanqin Li val s1_pf_fire_vec = GatedValidRegNext(s0_pf_fire_vec) 7020d32f713Shappy-lx 7030d32f713Shappy-lx val s0_pf_fire = l1_pf_req_arb.io.out.fire 704cd2ff98bShappy-lx val s0_pf_index = l1_pf_req_arb.io.chosen 70520e09ab1Shappy-lx val s0_pf_candidate_oh = get_candidate_oh(l1_pf_req_arb.io.out.bits.req.paddr) 7060d32f713Shappy-lx 70720e09ab1Shappy-lx for(i <- 0 until MLP_L1_SIZE) { 70820e09ab1Shappy-lx val evict = s1_l1_alloc && (s1_l1_index === i.U) 70970eea123SYanqin Li l1_pf_req_arb.io.in(i).valid := l1_array(i).can_send_pf(l1_valids(i)) && !evict 71020e09ab1Shappy-lx l1_pf_req_arb.io.in(i).bits.req.paddr := l1_array(i).get_pf_addr() 71120e09ab1Shappy-lx l1_pf_req_arb.io.in(i).bits.req.alias := l1_array(i).alias 71220e09ab1Shappy-lx l1_pf_req_arb.io.in(i).bits.req.confidence := io.confidence 71320e09ab1Shappy-lx l1_pf_req_arb.io.in(i).bits.req.is_store := false.B 71420e09ab1Shappy-lx l1_pf_req_arb.io.in(i).bits.req.pf_source := l1_array(i).source 71520e09ab1Shappy-lx l1_pf_req_arb.io.in(i).bits.debug_vaddr := l1_array(i).get_pf_debug_vaddr() 7160d32f713Shappy-lx } 7170d32f713Shappy-lx 7180d32f713Shappy-lx when(s0_pf_fire) { 71920e09ab1Shappy-lx l1_array(s0_pf_index).sent_vec := l1_array(s0_pf_index).sent_vec | s0_pf_candidate_oh 7200d32f713Shappy-lx } 7210d32f713Shappy-lx 7220d32f713Shappy-lx assert(PopCount(s0_pf_fire_vec) <= 1.U, "s0_pf_fire_vec should be one-hot or empty") 7230d32f713Shappy-lx 7240d32f713Shappy-lx // s1: send out to dcache 7250d32f713Shappy-lx val s1_pf_valid = Reg(Bool()) 7260d32f713Shappy-lx val s1_pf_bits = RegEnable(l1_pf_req_arb.io.out.bits, l1_pf_req_arb.io.out.fire) 7270d32f713Shappy-lx val s1_pf_index = RegEnable(s0_pf_index, l1_pf_req_arb.io.out.fire) 7280d32f713Shappy-lx val s1_pf_candidate_oh = RegEnable(s0_pf_candidate_oh, l1_pf_req_arb.io.out.fire) 72920e09ab1Shappy-lx val s1_pf_evict = s1_l1_alloc && (s1_l1_index === s1_pf_index) 73020e09ab1Shappy-lx val s1_pf_update = s1_l1_update && (s1_l1_index === s1_pf_index) 7310d32f713Shappy-lx val s1_pf_can_go = io.l1_req.ready && !s1_pf_evict && !s1_pf_update 7320d32f713Shappy-lx val s1_pf_fire = s1_pf_valid && s1_pf_can_go 7330d32f713Shappy-lx 7340d32f713Shappy-lx when(s1_pf_can_go) { 7350d32f713Shappy-lx s1_pf_valid := false.B 7360d32f713Shappy-lx } 7370d32f713Shappy-lx 7380d32f713Shappy-lx when(l1_pf_req_arb.io.out.fire) { 7390d32f713Shappy-lx s1_pf_valid := true.B 7400d32f713Shappy-lx } 7410d32f713Shappy-lx 7420d32f713Shappy-lx when(s1_pf_fire) { 74320e09ab1Shappy-lx l1_array(s1_pf_index).bit_vec := l1_array(s1_pf_index).bit_vec & ~s1_pf_candidate_oh 7440d32f713Shappy-lx } 7450d32f713Shappy-lx 7465bd65c56STang Haojin val in_pmem = PmemRanges.map(_.cover(s1_pf_bits.req.paddr)).reduce(_ || _) 74745def856STang Haojin io.l1_req.valid := s1_pf_valid && !s1_pf_evict && !s1_pf_update && in_pmem && io.enable 74820e09ab1Shappy-lx io.l1_req.bits := s1_pf_bits.req 7490d32f713Shappy-lx 7500d32f713Shappy-lx l1_pf_req_arb.io.out.ready := s1_pf_can_go || !s1_pf_valid 7510d32f713Shappy-lx 75220e09ab1Shappy-lx assert(!((s1_l1_alloc || s1_l1_update) && s1_pf_fire && (s1_l1_index === s1_pf_index)), "pf pipeline & enq pipeline bit_vec harzard!") 7530d32f713Shappy-lx 7540d32f713Shappy-lx XSPerfAccumulate("s1_pf_valid", s1_pf_valid) 7550d32f713Shappy-lx XSPerfAccumulate("s1_pf_block_by_pipe_unready", s1_pf_valid && !io.l1_req.ready) 7560d32f713Shappy-lx XSPerfAccumulate("s1_pf_block_by_enq_alloc_harzard", s1_pf_valid && s1_pf_evict) 7570d32f713Shappy-lx XSPerfAccumulate("s1_pf_block_by_enq_update_harzard", s1_pf_valid && s1_pf_update) 7580d32f713Shappy-lx XSPerfAccumulate("s1_pf_fire", s1_pf_fire) 7590d32f713Shappy-lx 7600d32f713Shappy-lx // l2 pf 7610d32f713Shappy-lx // s0: generate prefetch req paddr per entry, arb them, sent out 7620d32f713Shappy-lx io.l2_pf_addr.valid := l2_pf_req_arb.io.out.valid 76320e09ab1Shappy-lx io.l2_pf_addr.bits := l2_pf_req_arb.io.out.bits.req 7640d32f713Shappy-lx 7650d32f713Shappy-lx l2_pf_req_arb.io.out.ready := true.B 7660d32f713Shappy-lx 76720e09ab1Shappy-lx for(i <- 0 until MLP_L2L3_SIZE) { 76820e09ab1Shappy-lx val evict = s1_l2_alloc && (s1_l2_index === i.U) 76970eea123SYanqin Li l2_pf_req_arb.io.in(i).valid := l2_array(i).can_send_pf(l2_valids(i)) && (l2_array(i).sink === SINK_L2) && !evict 77020e09ab1Shappy-lx l2_pf_req_arb.io.in(i).bits.req.addr := l2_array(i).get_pf_addr() 77120e09ab1Shappy-lx l2_pf_req_arb.io.in(i).bits.req.source := MuxLookup(l2_array(i).source.value, MemReqSource.Prefetch2L2Unknown.id.U)(Seq( 7720d32f713Shappy-lx L1_HW_PREFETCH_STRIDE -> MemReqSource.Prefetch2L2Stride.id.U, 7730d32f713Shappy-lx L1_HW_PREFETCH_STREAM -> MemReqSource.Prefetch2L2Stream.id.U 7740d32f713Shappy-lx )) 77520e09ab1Shappy-lx l2_pf_req_arb.io.in(i).bits.debug_vaddr := l2_array(i).get_pf_debug_vaddr() 7760d32f713Shappy-lx } 7770d32f713Shappy-lx 7780d32f713Shappy-lx when(l2_pf_req_arb.io.out.valid) { 77920e09ab1Shappy-lx l2_array(l2_pf_req_arb.io.chosen).sent_vec := l2_array(l2_pf_req_arb.io.chosen).sent_vec | get_candidate_oh(l2_pf_req_arb.io.out.bits.req.addr) 7800d32f713Shappy-lx } 7810d32f713Shappy-lx 78220e09ab1Shappy-lx val stream_out_debug_table = ChiselDB.createTable("StreamPFTraceOut" + p(XSCoreParamsKey).HartId.toString, new StreamPFTraceOutEntry, basicDB = false) 78320e09ab1Shappy-lx val l1_debug_data = Wire(new StreamPFTraceOutEntry) 78420e09ab1Shappy-lx val l2_debug_data = Wire(new StreamPFTraceOutEntry) 78520e09ab1Shappy-lx l1_debug_data.PFVaddr := l1_pf_req_arb.io.out.bits.debug_vaddr 78620e09ab1Shappy-lx l1_debug_data.PFSink := SINK_L1 78720e09ab1Shappy-lx l2_debug_data.PFVaddr := l2_pf_req_arb.io.out.bits.debug_vaddr 78820e09ab1Shappy-lx l2_debug_data.PFSink := SINK_L2 78920e09ab1Shappy-lx 79020e09ab1Shappy-lx stream_out_debug_table.log( 79120e09ab1Shappy-lx data = l1_debug_data, 79220e09ab1Shappy-lx en = l1_pf_req_arb.io.out.fire && (l1_pf_req_arb.io.out.bits.req.pf_source.value === L1_HW_PREFETCH_STREAM), 79320e09ab1Shappy-lx site = "StreamPFTraceOut", 79420e09ab1Shappy-lx clock = clock, 79520e09ab1Shappy-lx reset = reset 79620e09ab1Shappy-lx ) 79720e09ab1Shappy-lx stream_out_debug_table.log( 79820e09ab1Shappy-lx data = l2_debug_data, 79920e09ab1Shappy-lx en = l2_pf_req_arb.io.out.fire && (l2_pf_req_arb.io.out.bits.req.source === MemReqSource.Prefetch2L2Stream.id.U), 80020e09ab1Shappy-lx site = "StreamPFTraceOut", 80120e09ab1Shappy-lx clock = clock, 80220e09ab1Shappy-lx reset = reset 80320e09ab1Shappy-lx ) 80420e09ab1Shappy-lx 8050d32f713Shappy-lx // last level cache pf 8060d32f713Shappy-lx // s0: generate prefetch req paddr per entry, arb them, sent out 8070d32f713Shappy-lx io.l3_pf_addr.valid := l3_pf_req_arb.io.out.valid 8080d32f713Shappy-lx io.l3_pf_addr.bits := l3_pf_req_arb.io.out.bits 8090d32f713Shappy-lx 8100d32f713Shappy-lx l3_pf_req_arb.io.out.ready := true.B 8110d32f713Shappy-lx 81220e09ab1Shappy-lx for(i <- 0 until MLP_L2L3_SIZE) { 81320e09ab1Shappy-lx val evict = s1_l2_alloc && (s1_l2_index === i.U) 81470eea123SYanqin Li l3_pf_req_arb.io.in(i).valid := l2_array(i).can_send_pf(l2_valids(i)) && (l2_array(i).sink === SINK_L3) && !evict 81520e09ab1Shappy-lx l3_pf_req_arb.io.in(i).bits := l2_array(i).get_pf_addr() 8160d32f713Shappy-lx } 8170d32f713Shappy-lx 8180d32f713Shappy-lx when(l3_pf_req_arb.io.out.valid) { 81920e09ab1Shappy-lx l2_array(l3_pf_req_arb.io.chosen).sent_vec := l2_array(l3_pf_req_arb.io.chosen).sent_vec | get_candidate_oh(l3_pf_req_arb.io.out.bits) 8200d32f713Shappy-lx } 8210d32f713Shappy-lx 8220d32f713Shappy-lx // reset meta to avoid muti-hit problem 8230d32f713Shappy-lx for(i <- 0 until MLP_SIZE) { 82420e09ab1Shappy-lx if(i < MLP_L1_SIZE) { 82570eea123SYanqin Li when(RegNext(io.flush)) { 82670eea123SYanqin Li reset_array(i, false) 82720e09ab1Shappy-lx } 82820e09ab1Shappy-lx }else { 82970eea123SYanqin Li when(RegNext(io.flush)) { 83070eea123SYanqin Li reset_array(i - MLP_L1_SIZE, true) 83120e09ab1Shappy-lx } 8320d32f713Shappy-lx } 8330d32f713Shappy-lx } 8340d32f713Shappy-lx 8350d32f713Shappy-lx XSPerfAccumulate("l2_prefetche_queue_busby", io.l2PfqBusy) 83670eea123SYanqin Li XSPerfHistogram("filter_active", PopCount(VecInit( 83770eea123SYanqin Li l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v) } ++ 83870eea123SYanqin Li l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) } 83970eea123SYanqin Li ).asUInt), true.B, 0, MLP_SIZE, 1) 84070eea123SYanqin Li XSPerfHistogram("l1_filter_active", PopCount(VecInit(l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v)}).asUInt), true.B, 0, MLP_L1_SIZE, 1) 84170eea123SYanqin Li XSPerfHistogram("l2_filter_active", PopCount(VecInit(l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) && (e.sink === SINK_L2)}).asUInt), true.B, 0, MLP_L2L3_SIZE, 1) 84270eea123SYanqin Li XSPerfHistogram("l3_filter_active", PopCount(VecInit(l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) && (e.sink === SINK_L3)}).asUInt), true.B, 0, MLP_L2L3_SIZE, 1) 8430d32f713Shappy-lx} 8440d32f713Shappy-lx 8450d32f713Shappy-lxclass L1Prefetcher(implicit p: Parameters) extends BasePrefecher with HasStreamPrefetchHelper with HasStridePrefetchHelper { 8460d32f713Shappy-lx val pf_ctrl = IO(Input(new PrefetchControlBundle)) 847*99ce5576Scz4e val stride_train = IO(Flipped(Vec(backendParams.LduCnt + backendParams.HyuCnt, ValidIO(new LsPrefetchTrainBundle())))) 8480d32f713Shappy-lx val l2PfqBusy = IO(Input(Bool())) 8490d32f713Shappy-lx 8500d32f713Shappy-lx val stride_train_filter = Module(new TrainFilter(STRIDE_FILTER_SIZE, "stride")) 8510d32f713Shappy-lx val stride_meta_array = Module(new StrideMetaArray) 8520d32f713Shappy-lx val stream_train_filter = Module(new TrainFilter(STREAM_FILTER_SIZE, "stream")) 8530d32f713Shappy-lx val stream_bit_vec_array = Module(new StreamBitVectorArray) 8540d32f713Shappy-lx val pf_queue_filter = Module(new MutiLevelPrefetchFilter) 8550d32f713Shappy-lx 8560d32f713Shappy-lx // for now, if the stream is disabled, train and prefetch process will continue, without sending out and reqs 8570d32f713Shappy-lx val enable = io.enable 8580d32f713Shappy-lx val flush = pf_ctrl.flush 8590d32f713Shappy-lx 8600d32f713Shappy-lx stream_train_filter.io.ld_in.zipWithIndex.foreach { 8610d32f713Shappy-lx case (ld_in, i) => { 8620d32f713Shappy-lx ld_in.valid := io.ld_in(i).valid && enable 8630d32f713Shappy-lx ld_in.bits := io.ld_in(i).bits 8640d32f713Shappy-lx } 8650d32f713Shappy-lx } 8660d32f713Shappy-lx stream_train_filter.io.enable := enable 8670d32f713Shappy-lx stream_train_filter.io.flush := flush 8680d32f713Shappy-lx 8690d32f713Shappy-lx stride_train_filter.io.ld_in.zipWithIndex.foreach { 8700d32f713Shappy-lx case (ld_in, i) => { 8710d32f713Shappy-lx ld_in.valid := stride_train(i).valid && enable 8720d32f713Shappy-lx ld_in.bits := stride_train(i).bits 8730d32f713Shappy-lx } 8740d32f713Shappy-lx } 8750d32f713Shappy-lx stride_train_filter.io.enable := enable 8760d32f713Shappy-lx stride_train_filter.io.flush := flush 8770d32f713Shappy-lx 8780d32f713Shappy-lx stream_bit_vec_array.io.enable := enable 8790d32f713Shappy-lx stream_bit_vec_array.io.flush := flush 8800d32f713Shappy-lx stream_bit_vec_array.io.dynamic_depth := pf_ctrl.dynamic_depth 8810d32f713Shappy-lx stream_bit_vec_array.io.train_req <> stream_train_filter.io.train_req 8820d32f713Shappy-lx 8830d32f713Shappy-lx stride_meta_array.io.enable := enable 8840d32f713Shappy-lx stride_meta_array.io.flush := flush 8850d32f713Shappy-lx stride_meta_array.io.dynamic_depth := 0.U 8860d32f713Shappy-lx stride_meta_array.io.train_req <> stride_train_filter.io.train_req 8870d32f713Shappy-lx stride_meta_array.io.stream_lookup_req <> stream_bit_vec_array.io.stream_lookup_req 8880d32f713Shappy-lx stride_meta_array.io.stream_lookup_resp <> stream_bit_vec_array.io.stream_lookup_resp 8890d32f713Shappy-lx 8900d32f713Shappy-lx // stream has higher priority than stride 89120e09ab1Shappy-lx pf_queue_filter.io.l1_prefetch_req.valid := stream_bit_vec_array.io.l1_prefetch_req.valid || stride_meta_array.io.l1_prefetch_req.valid 89220e09ab1Shappy-lx pf_queue_filter.io.l1_prefetch_req.bits := Mux( 89320e09ab1Shappy-lx stream_bit_vec_array.io.l1_prefetch_req.valid, 89420e09ab1Shappy-lx stream_bit_vec_array.io.l1_prefetch_req.bits, 89520e09ab1Shappy-lx stride_meta_array.io.l1_prefetch_req.bits 89620e09ab1Shappy-lx ) 89720e09ab1Shappy-lx 89820e09ab1Shappy-lx pf_queue_filter.io.l2_l3_prefetch_req.valid := stream_bit_vec_array.io.l2_l3_prefetch_req.valid || stride_meta_array.io.l2_l3_prefetch_req.valid 89920e09ab1Shappy-lx pf_queue_filter.io.l2_l3_prefetch_req.bits := Mux( 90020e09ab1Shappy-lx stream_bit_vec_array.io.l2_l3_prefetch_req.valid, 90120e09ab1Shappy-lx stream_bit_vec_array.io.l2_l3_prefetch_req.bits, 90220e09ab1Shappy-lx stride_meta_array.io.l2_l3_prefetch_req.bits 9030d32f713Shappy-lx ) 9040d32f713Shappy-lx 9050d32f713Shappy-lx io.l1_req.valid := pf_queue_filter.io.l1_req.valid && enable && pf_ctrl.enable 9060d32f713Shappy-lx io.l1_req.bits := pf_queue_filter.io.l1_req.bits 9070d32f713Shappy-lx 9080d32f713Shappy-lx pf_queue_filter.io.l1_req.ready := Mux(pf_ctrl.enable, io.l1_req.ready, true.B) 9090d32f713Shappy-lx pf_queue_filter.io.tlb_req <> io.tlb_req 91025a80bceSYanqin Li pf_queue_filter.io.pmp_resp := io.pmp_resp 9110d32f713Shappy-lx pf_queue_filter.io.enable := enable 9120d32f713Shappy-lx pf_queue_filter.io.flush := flush 9130d32f713Shappy-lx pf_queue_filter.io.confidence := pf_ctrl.confidence 9140d32f713Shappy-lx pf_queue_filter.io.l2PfqBusy := l2PfqBusy 9150d32f713Shappy-lx 9165bd65c56STang Haojin val l2_in_pmem = PmemRanges.map(_.cover(pf_queue_filter.io.l2_pf_addr.bits.addr)).reduce(_ || _) 91745def856STang Haojin io.l2_req.valid := pf_queue_filter.io.l2_pf_addr.valid && l2_in_pmem && enable && pf_ctrl.enable 9180d32f713Shappy-lx io.l2_req.bits := pf_queue_filter.io.l2_pf_addr.bits 9190d32f713Shappy-lx 9205bd65c56STang Haojin val l3_in_pmem = PmemRanges.map(_.cover(pf_queue_filter.io.l3_pf_addr.bits)).reduce(_ || _) 92145def856STang Haojin io.l3_req.valid := pf_queue_filter.io.l3_pf_addr.valid && l3_in_pmem && enable && pf_ctrl.enable 9220d32f713Shappy-lx io.l3_req.bits := pf_queue_filter.io.l3_pf_addr.bits 9230d32f713Shappy-lx} 924