1package xiangshan.mem.prefetch 2 3import org.chipsalliance.cde.config.Parameters 4import chisel3._ 5import chisel3.util._ 6import xiangshan._ 7import utils._ 8import utility._ 9import xiangshan.cache.HasDCacheParameters 10import xiangshan.cache.mmu._ 11import xiangshan.mem.{L1PrefetchReq, LdPrefetchTrainBundle} 12import xiangshan.mem.trace._ 13import xiangshan.mem.L1PrefetchSource 14 15trait HasStreamPrefetchHelper extends HasL1PrefetchHelper { 16 // capacity related 17 val STREAM_FILTER_SIZE = 4 18 val BIT_VEC_ARRAY_SIZE = 16 19 val ACTIVE_THRESHOLD = BIT_VEC_WITDH - 4 20 val INIT_DEC_MODE = false 21 22 // bit_vector [StreamBitVectorBundle]: 23 // `X`: valid; `.`: invalid; `H`: hit 24 // [X X X X X X X X X . . H . X X X] [. . X X X X . . . . . . . . . .] 25 // hit in 12th slot & active ---------------------> prefetch bit_vector [StreamPrefetchReqBundle] 26 // | <---------------------------- depth ----------------------------> 27 // | <-- width -- > 28 val DEPTH_BYTES = 1024 29 val DEPTH_CACHE_BLOCKS = DEPTH_BYTES / dcacheParameters.blockBytes 30 val WIDTH_BYTES = 128 31 val WIDTH_CACHE_BLOCKS = WIDTH_BYTES / dcacheParameters.blockBytes 32 33 val L2_DEPTH_RATIO = 2 34 val L2_WIDTH_BYTES = WIDTH_BYTES * 2 35 val L2_WIDTH_CACHE_BLOCKS = L2_WIDTH_BYTES / dcacheParameters.blockBytes 36 37 val L3_DEPTH_RATIO = 3 38 val L3_WIDTH_BYTES = WIDTH_BYTES * 2 * 2 39 val L3_WIDTH_CACHE_BLOCKS = L3_WIDTH_BYTES / dcacheParameters.blockBytes 40 41 val DEPTH_LOOKAHEAD = 6 42 val DEPTH_BITS = log2Up(DEPTH_CACHE_BLOCKS) + DEPTH_LOOKAHEAD 43 44 val ENABLE_DECR_MODE = false 45 val ENABLE_STRICT_ACTIVE_DETECTION = true 46 47 // constraints 48 require((DEPTH_BYTES >= REGION_SIZE) && ((DEPTH_BYTES % REGION_SIZE) == 0) && ((DEPTH_BYTES / REGION_SIZE) > 0)) 49 require(((VADDR_HASH_WIDTH * 3) + BLK_ADDR_RAW_WIDTH) <= REGION_TAG_BITS) 50 require(WIDTH_BYTES >= dcacheParameters.blockBytes) 51} 52 53class StreamBitVectorBundle(implicit p: Parameters) extends XSBundle with HasStreamPrefetchHelper { 54 val tag = UInt(REGION_TAG_BITS.W) 55 val bit_vec = UInt(BIT_VEC_WITDH.W) 56 val active = Bool() 57 // cnt can be optimized 58 val cnt = UInt((log2Up(BIT_VEC_WITDH) + 1).W) 59 val decr_mode = Bool() 60 61 // debug usage 62 val trigger_full_va = UInt(VAddrBits.W) 63 64 def reset(index: Int) = { 65 tag := index.U 66 bit_vec := 0.U 67 active := false.B 68 cnt := 0.U 69 decr_mode := INIT_DEC_MODE.B 70 trigger_full_va := 0xdeadbeefL.U 71 } 72 73 def tag_match(valid1: Bool, valid2: Bool, new_tag: UInt): Bool = { 74 valid1 && valid2 && region_hash_tag(tag) === region_hash_tag(new_tag) 75 } 76 77 def alloc(alloc_tag: UInt, alloc_bit_vec: UInt, alloc_active: Bool, alloc_decr_mode: Bool, alloc_full_vaddr: UInt) = { 78 tag := alloc_tag 79 bit_vec := alloc_bit_vec 80 active := alloc_active 81 cnt := 1.U 82 trigger_full_va := alloc_full_vaddr 83 if(ENABLE_DECR_MODE) { 84 decr_mode := alloc_decr_mode 85 }else { 86 decr_mode := INIT_DEC_MODE.B 87 } 88 89 90 assert(PopCount(alloc_bit_vec) === 1.U, "alloc vector should be one hot") 91 } 92 93 def update(update_bit_vec: UInt, update_active: Bool) = { 94 // if the slot is 0 before, increment cnt 95 val cnt_en = !((bit_vec & update_bit_vec).orR) 96 val cnt_next = Mux(cnt_en, cnt + 1.U, cnt) 97 98 bit_vec := bit_vec | update_bit_vec 99 cnt := cnt_next 100 when(cnt_next >= ACTIVE_THRESHOLD.U) { 101 active := true.B 102 } 103 when(update_active) { 104 active := true.B 105 } 106 107 assert(PopCount(update_bit_vec) === 1.U, "update vector should be one hot") 108 assert(cnt <= BIT_VEC_WITDH.U, "cnt should always less than bit vector size") 109 } 110} 111 112class StreamPrefetchReqBundle(implicit p: Parameters) extends XSBundle with HasStreamPrefetchHelper { 113 val region = UInt(REGION_TAG_BITS.W) 114 val bit_vec = UInt(BIT_VEC_WITDH.W) 115 val sink = UInt(SINK_BITS.W) 116 val source = new L1PrefetchSource() 117 // debug usage 118 val trigger_pc = UInt(VAddrBits.W) 119 val trigger_va = UInt(VAddrBits.W) 120 121 // align prefetch vaddr and width to region 122 def getStreamPrefetchReqBundle(valid: Bool, vaddr: UInt, width: Int, decr_mode: Bool, sink: UInt, source: UInt, t_pc: UInt, t_va: UInt): StreamPrefetchReqBundle = { 123 val res = Wire(new StreamPrefetchReqBundle) 124 res.region := get_region_tag(vaddr) 125 res.sink := sink 126 res.source.value := source 127 128 res.trigger_pc := t_pc 129 res.trigger_va := t_va 130 131 val region_bits = get_region_bits(vaddr) 132 val region_bit_vec = UIntToOH(region_bits) 133 res.bit_vec := Mux( 134 decr_mode, 135 (0 until width).map{ case i => region_bit_vec >> i}.reduce(_ | _), 136 (0 until width).map{ case i => region_bit_vec << i}.reduce(_ | _) 137 ) 138 139 assert(!valid || PopCount(res.bit_vec) <= width.U, "actual prefetch block number should less than or equals to WIDTH_CACHE_BLOCKS") 140 assert(!valid || PopCount(res.bit_vec) >= 1.U, "at least one block should be included") 141 assert(sink <= SINK_L3, "invalid sink") 142 for(i <- 0 until BIT_VEC_WITDH) { 143 when(decr_mode) { 144 when(i.U > region_bits) { 145 assert(!valid || res.bit_vec(i) === 0.U, s"res.bit_vec(${i}) is not zero in decr_mode, prefetch vector is wrong!") 146 }.elsewhen(i.U === region_bits) { 147 assert(!valid || res.bit_vec(i) === 1.U, s"res.bit_vec(${i}) is zero in decr_mode, prefetch vector is wrong!") 148 } 149 }.otherwise { 150 when(i.U < region_bits) { 151 assert(!valid || res.bit_vec(i) === 0.U, s"res.bit_vec(${i}) is not zero in incr_mode, prefetch vector is wrong!") 152 }.elsewhen(i.U === region_bits) { 153 assert(!valid || res.bit_vec(i) === 1.U, s"res.bit_vec(${i}) is zero in decr_mode, prefetch vector is wrong!") 154 } 155 } 156 } 157 158 res 159 } 160} 161 162class StreamBitVectorArray(implicit p: Parameters) extends XSModule with HasStreamPrefetchHelper { 163 val io = IO(new XSBundle { 164 val enable = Input(Bool()) 165 // TODO: flush all entry when process changing happens, or disable stream prefetch for a while 166 val flush = Input(Bool()) 167 val dynamic_depth = Input(UInt(DEPTH_BITS.W)) 168 val train_req = Flipped(DecoupledIO(new PrefetchReqBundle)) 169 val l1_prefetch_req = ValidIO(new StreamPrefetchReqBundle) 170 val l2_l3_prefetch_req = ValidIO(new StreamPrefetchReqBundle) 171 172 // Stride send lookup req here 173 val stream_lookup_req = Flipped(ValidIO(new PrefetchReqBundle)) 174 val stream_lookup_resp = Output(Bool()) 175 }) 176 177 val array = Reg(Vec(BIT_VEC_ARRAY_SIZE, new StreamBitVectorBundle)) 178 val valids = RegInit(VecInit(Seq.fill(BIT_VEC_ARRAY_SIZE)(false.B))) 179 180 def reset_array(i: Int): Unit = { 181 valids(i) := false.B 182 //only need to rest control signals for firendly area 183 // array(i).reset(i) 184 } 185 186 val replacement = ReplacementPolicy.fromString("plru", BIT_VEC_ARRAY_SIZE) 187 188 // s0: generate region tag, parallel match 189 val s0_can_accept = Wire(Bool()) 190 val s0_valid = io.train_req.fire 191 val s0_pc = io.train_req.bits.pc 192 val s0_vaddr = io.train_req.bits.vaddr 193 val s0_region_bits = get_region_bits(s0_vaddr) 194 val s0_region_tag = get_region_tag(s0_vaddr) 195 val s0_region_tag_plus_one = get_region_tag(s0_vaddr) + 1.U 196 val s0_region_tag_minus_one = get_region_tag(s0_vaddr) - 1.U 197 val s0_region_tag_match_vec = array zip valids map { case (e, v) => e.tag_match(v, s0_valid, s0_region_tag) } 198 val s0_region_tag_plus_one_match_vec = array zip valids map { case (e, v) => e.tag_match(v, s0_valid, s0_region_tag_plus_one) } 199 val s0_region_tag_minus_one_match_vec = array zip valids map { case (e, v) => e.tag_match(v, s0_valid, s0_region_tag_minus_one) } 200 val s0_hit = Cat(s0_region_tag_match_vec).orR 201 val s0_plus_one_hit = Cat(s0_region_tag_plus_one_match_vec).orR 202 val s0_minus_one_hit = Cat(s0_region_tag_minus_one_match_vec).orR 203 val s0_hit_vec = VecInit(s0_region_tag_match_vec).asUInt 204 val s0_index = Mux(s0_hit, OHToUInt(s0_hit_vec), replacement.way) 205 val s0_plus_one_index = OHToUInt(VecInit(s0_region_tag_plus_one_match_vec).asUInt) 206 val s0_minus_one_index = OHToUInt(VecInit(s0_region_tag_minus_one_match_vec).asUInt) 207 io.train_req.ready := s0_can_accept 208 209 when(s0_valid) { 210 replacement.access(s0_index) 211 } 212 213 val stream_pf_train_debug_table = ChiselDB.createTable("StreamTrainTraceTable" + p(XSCoreParamsKey).HartId.toString, new StreamTrainTraceEntry, basicDB = false) 214 215 val spf_log_enable = s0_valid 216 val spf_log_data = Wire(new StreamTrainTraceEntry) 217 218 // WARNING: the type here only indicates trigger by stream, not saying it's sink 219 spf_log_data.Type := MemReqSource.Prefetch2L2Stream.id.U 220 spf_log_data.OldAddr := Mux( 221 !s0_hit, 222 s0_vaddr, 223 array(s0_index).trigger_full_va 224 ) 225 spf_log_data.CurAddr := s0_vaddr 226 spf_log_data.Offset := DontCare 227 spf_log_data.Score := DontCare 228 spf_log_data.Miss := io.train_req.bits.miss 229 230 stream_pf_train_debug_table.log( 231 data = spf_log_data, 232 en = spf_log_enable, 233 site = "StreamTrainTraceTable", 234 clock = clock, 235 reset = reset 236 ) 237 238 assert(!s0_valid || PopCount(VecInit(s0_region_tag_match_vec)) <= 1.U, "req region should match no more than 1 entry") 239 assert(!s0_valid || PopCount(VecInit(s0_region_tag_plus_one_match_vec)) <= 1.U, "req region plus 1 should match no more than 1 entry") 240 assert(!s0_valid || PopCount(VecInit(s0_region_tag_minus_one_match_vec)) <= 1.U, "req region minus 1 should match no more than 1 entry") 241 assert(!s0_valid || !(s0_hit && s0_plus_one_hit && (s0_index === s0_plus_one_index)), "region and region plus 1 index match failed") 242 assert(!s0_valid || !(s0_hit && s0_minus_one_hit && (s0_index === s0_minus_one_index)), "region and region minus 1 index match failed") 243 assert(!s0_valid || !(s0_plus_one_hit && s0_minus_one_hit && (s0_minus_one_index === s0_plus_one_index)), "region plus 1 and region minus 1 index match failed") 244 assert(!(s0_valid && RegNext(s0_valid) && !s0_hit && !RegEnable(s0_hit, s0_valid) && replacement.way === RegEnable(replacement.way, s0_valid)), "replacement error") 245 246 XSPerfAccumulate("s0_valid_train_req", s0_valid) 247 val s0_hit_pattern_vec = Seq(s0_hit, s0_plus_one_hit, s0_minus_one_hit) 248 for(i <- 0 until (1 << s0_hit_pattern_vec.size)) { 249 XSPerfAccumulate(s"s0_hit_pattern_${toBinary(i)}", (VecInit(s0_hit_pattern_vec).asUInt === i.U) && s0_valid) 250 } 251 XSPerfAccumulate("s0_replace_the_neighbor", s0_valid && !s0_hit && ((s0_plus_one_hit && (s0_index === s0_plus_one_index)) || (s0_minus_one_hit && (s0_index === s0_minus_one_index)))) 252 XSPerfAccumulate("s0_req_valid", io.train_req.valid) 253 XSPerfAccumulate("s0_req_cannot_accept", io.train_req.valid && !io.train_req.ready) 254 255 val ratio_const = Constantin.createRecord(s"l2DepthRatio${p(XSCoreParamsKey).HartId}", initValue = L2_DEPTH_RATIO) 256 val ratio = ratio_const(3, 0) 257 258 val l3_ratio_const = Constantin.createRecord(s"l3DepthRatio${p(XSCoreParamsKey).HartId}", initValue = L3_DEPTH_RATIO) 259 val l3_ratio = l3_ratio_const(3, 0) 260 261 // s1: alloc or update 262 val s1_valid = GatedValidRegNext(s0_valid) 263 val s1_index = RegEnable(s0_index, s0_valid) 264 val s1_pc = RegEnable(s0_pc, s0_valid) 265 val s1_vaddr = RegEnable(s0_vaddr, s0_valid) 266 val s1_plus_one_index = RegEnable(s0_plus_one_index, s0_valid) 267 val s1_minus_one_index = RegEnable(s0_minus_one_index, s0_valid) 268 val s1_hit = RegEnable(s0_hit, s0_valid) 269 val s1_plus_one_hit = if(ENABLE_STRICT_ACTIVE_DETECTION) 270 RegEnable(s0_plus_one_hit, s0_valid) && array(s1_plus_one_index).active && (array(s1_plus_one_index).cnt >= ACTIVE_THRESHOLD.U) 271 else 272 RegEnable(s0_plus_one_hit, s0_valid) && array(s1_plus_one_index).active 273 val s1_minus_one_hit = if(ENABLE_STRICT_ACTIVE_DETECTION) 274 RegEnable(s0_minus_one_hit, s0_valid) && array(s1_minus_one_index).active && (array(s1_minus_one_index).cnt >= ACTIVE_THRESHOLD.U) 275 else 276 RegEnable(s0_minus_one_hit, s0_valid) && array(s1_minus_one_index).active 277 val s1_region_tag = RegEnable(s0_region_tag, s0_valid) 278 val s1_region_bits = RegEnable(s0_region_bits, s0_valid) 279 val s1_alloc = s1_valid && !s1_hit 280 val s1_update = s1_valid && s1_hit 281 val s1_pf_l1_incr_vaddr = Cat(region_to_block_addr(s1_region_tag, s1_region_bits) + io.dynamic_depth, 0.U(BLOCK_OFFSET.W)) 282 val s1_pf_l1_decr_vaddr = Cat(region_to_block_addr(s1_region_tag, s1_region_bits) - io.dynamic_depth, 0.U(BLOCK_OFFSET.W)) 283 val s1_pf_l2_incr_vaddr = Cat(region_to_block_addr(s1_region_tag, s1_region_bits) + (io.dynamic_depth << ratio), 0.U(BLOCK_OFFSET.W)) 284 val s1_pf_l2_decr_vaddr = Cat(region_to_block_addr(s1_region_tag, s1_region_bits) - (io.dynamic_depth << ratio), 0.U(BLOCK_OFFSET.W)) 285 val s1_pf_l3_incr_vaddr = Cat(region_to_block_addr(s1_region_tag, s1_region_bits) + (io.dynamic_depth << l3_ratio), 0.U(BLOCK_OFFSET.W)) 286 val s1_pf_l3_decr_vaddr = Cat(region_to_block_addr(s1_region_tag, s1_region_bits) - (io.dynamic_depth << l3_ratio), 0.U(BLOCK_OFFSET.W)) 287 // TODO: remove this 288 val s1_can_send_pf = Mux(s1_update, !((array(s1_index).bit_vec & UIntToOH(s1_region_bits)).orR), true.B) 289 s0_can_accept := !(s1_valid && (region_hash_tag(s1_region_tag) === region_hash_tag(s0_region_tag))) 290 291 when(s1_alloc) { 292 // alloc a new entry 293 valids(s1_index) := true.B 294 array(s1_index).alloc( 295 alloc_tag = s1_region_tag, 296 alloc_bit_vec = UIntToOH(s1_region_bits), 297 alloc_active = s1_plus_one_hit || s1_minus_one_hit, 298 alloc_decr_mode = RegEnable(s0_plus_one_hit, s0_valid), 299 alloc_full_vaddr = RegEnable(s0_vaddr, s0_valid) 300 ) 301 302 }.elsewhen(s1_update) { 303 // update a existing entry 304 assert(array(s1_index).cnt =/= 0.U || valids(s1_index), "entry should have been allocated before") 305 array(s1_index).update( 306 update_bit_vec = UIntToOH(s1_region_bits), 307 update_active = s1_plus_one_hit || s1_minus_one_hit) 308 } 309 310 XSPerfAccumulate("s1_alloc", s1_alloc) 311 XSPerfAccumulate("s1_update", s1_update) 312 XSPerfAccumulate("s1_active_plus_one_hit", s1_valid && s1_plus_one_hit) 313 XSPerfAccumulate("s1_active_minus_one_hit", s1_valid && s1_minus_one_hit) 314 315 // s2: trigger prefetch if hit active bit vector, compute meta of prefetch req 316 val s2_valid = GatedValidRegNext(s1_valid) 317 val s2_index = RegEnable(s1_index, s1_valid) 318 val s2_pc = RegEnable(s1_pc, s1_valid) 319 val s2_vaddr = RegEnable(s1_vaddr, s1_valid) 320 val s2_region_bits = RegEnable(s1_region_bits, s1_valid) 321 val s2_region_tag = RegEnable(s1_region_tag, s1_valid) 322 val s2_pf_l1_incr_vaddr = RegEnable(s1_pf_l1_incr_vaddr, s1_valid) 323 val s2_pf_l1_decr_vaddr = RegEnable(s1_pf_l1_decr_vaddr, s1_valid) 324 val s2_pf_l2_incr_vaddr = RegEnable(s1_pf_l2_incr_vaddr, s1_valid) 325 val s2_pf_l2_decr_vaddr = RegEnable(s1_pf_l2_decr_vaddr, s1_valid) 326 val s2_pf_l3_incr_vaddr = RegEnable(s1_pf_l3_incr_vaddr, s1_valid) 327 val s2_pf_l3_decr_vaddr = RegEnable(s1_pf_l3_decr_vaddr, s1_valid) 328 val s2_can_send_pf = RegEnable(s1_can_send_pf, s1_valid) 329 val s2_active = array(s2_index).active 330 val s2_decr_mode = array(s2_index).decr_mode 331 val s2_l1_vaddr = Mux(s2_decr_mode, s2_pf_l1_decr_vaddr, s2_pf_l1_incr_vaddr) 332 val s2_l2_vaddr = Mux(s2_decr_mode, s2_pf_l2_decr_vaddr, s2_pf_l2_incr_vaddr) 333 val s2_l3_vaddr = Mux(s2_decr_mode, s2_pf_l3_decr_vaddr, s2_pf_l3_incr_vaddr) 334 val s2_will_send_pf = s2_valid && s2_active && s2_can_send_pf 335 val s2_pf_req_valid = s2_will_send_pf && io.enable 336 val s2_pf_l1_req_bits = (new StreamPrefetchReqBundle).getStreamPrefetchReqBundle( 337 valid = s2_valid, 338 vaddr = s2_l1_vaddr, 339 width = WIDTH_CACHE_BLOCKS, 340 decr_mode = s2_decr_mode, 341 sink = SINK_L1, 342 source = L1_HW_PREFETCH_STREAM, 343 t_pc = s2_pc, 344 t_va = s2_vaddr 345 ) 346 val s2_pf_l2_req_bits = (new StreamPrefetchReqBundle).getStreamPrefetchReqBundle( 347 valid = s2_valid, 348 vaddr = s2_l2_vaddr, 349 width = L2_WIDTH_CACHE_BLOCKS, 350 decr_mode = s2_decr_mode, 351 sink = SINK_L2, 352 source = L1_HW_PREFETCH_STREAM, 353 t_pc = s2_pc, 354 t_va = s2_vaddr 355 ) 356 val s2_pf_l3_req_bits = (new StreamPrefetchReqBundle).getStreamPrefetchReqBundle( 357 valid = s2_valid, 358 vaddr = s2_l3_vaddr, 359 width = L3_WIDTH_CACHE_BLOCKS, 360 decr_mode = s2_decr_mode, 361 sink = SINK_L3, 362 source = L1_HW_PREFETCH_STREAM, 363 t_pc = s2_pc, 364 t_va = s2_vaddr 365 ) 366 367 XSPerfAccumulate("s2_valid", s2_valid) 368 XSPerfAccumulate("s2_will_not_send_pf", s2_valid && !s2_will_send_pf) 369 XSPerfAccumulate("s2_will_send_decr_pf", s2_valid && s2_will_send_pf && s2_decr_mode) 370 XSPerfAccumulate("s2_will_send_incr_pf", s2_valid && s2_will_send_pf && !s2_decr_mode) 371 372 // s3: send the l1 prefetch req out 373 val s3_pf_l1_valid = GatedValidRegNext(s2_pf_req_valid) 374 val s3_pf_l1_bits = RegEnable(s2_pf_l1_req_bits, s2_pf_req_valid) 375 val s3_pf_l2_valid = GatedValidRegNext(s2_pf_req_valid) 376 val s3_pf_l2_bits = RegEnable(s2_pf_l2_req_bits, s2_pf_req_valid) 377 val s3_pf_l3_bits = RegEnable(s2_pf_l3_req_bits, s2_pf_req_valid) 378 379 XSPerfAccumulate("s3_pf_sent", s3_pf_l1_valid) 380 381 // s4: send the l2 prefetch req out 382 val s4_pf_l2_valid = GatedValidRegNext(s3_pf_l2_valid) 383 val s4_pf_l2_bits = RegEnable(s3_pf_l2_bits, s3_pf_l2_valid) 384 val s4_pf_l3_bits = RegEnable(s3_pf_l3_bits, s3_pf_l2_valid) 385 386 val enable_l3_pf = Constantin.createRecord(s"enableL3StreamPrefetch${p(XSCoreParamsKey).HartId}", initValue = false) 387 // s5: send the l3 prefetch req out 388 val s5_pf_l3_valid = GatedValidRegNext(s4_pf_l2_valid) && enable_l3_pf 389 val s5_pf_l3_bits = RegEnable(s4_pf_l3_bits, s4_pf_l2_valid) 390 391 io.l1_prefetch_req.valid := s3_pf_l1_valid 392 io.l1_prefetch_req.bits := s3_pf_l1_bits 393 io.l2_l3_prefetch_req.valid := s4_pf_l2_valid || s5_pf_l3_valid 394 io.l2_l3_prefetch_req.bits := Mux(s4_pf_l2_valid, s4_pf_l2_bits, s5_pf_l3_bits) 395 396 XSPerfAccumulate("s4_pf_sent", s4_pf_l2_valid) 397 XSPerfAccumulate("s5_pf_sent", !s4_pf_l2_valid && s5_pf_l3_valid) 398 XSPerfAccumulate("pf_sent", PopCount(Seq(io.l1_prefetch_req.valid, io.l2_l3_prefetch_req.valid))) 399 400 // Stride lookup starts here 401 // S0: Stride send req 402 val s0_lookup_valid = io.stream_lookup_req.valid 403 val s0_lookup_vaddr = io.stream_lookup_req.bits.vaddr 404 val s0_lookup_tag = get_region_tag(s0_lookup_vaddr) 405 // S1: match 406 val s1_lookup_valid = GatedValidRegNext(s0_lookup_valid) 407 val s1_lookup_tag = RegEnable(s0_lookup_tag, s0_lookup_valid) 408 val s1_lookup_tag_match_vec = array zip valids map { case (e, v) => e.tag_match(v, s1_lookup_valid, s1_lookup_tag) } 409 val s1_lookup_hit = VecInit(s1_lookup_tag_match_vec).asUInt.orR 410 val s1_lookup_index = OHToUInt(VecInit(s1_lookup_tag_match_vec)) 411 // S2: read active out 412 val s2_lookup_valid = GatedValidRegNext(s1_lookup_valid) 413 val s2_lookup_hit = RegEnable(s1_lookup_hit, s1_lookup_valid) 414 val s2_lookup_index = RegEnable(s1_lookup_index, s1_lookup_valid) 415 val s2_lookup_active = array(s2_lookup_index).active 416 // S3: send back to Stride 417 val s3_lookup_valid = GatedValidRegNext(s2_lookup_valid) 418 val s3_lookup_hit = RegEnable(s2_lookup_hit, s2_lookup_valid) 419 val s3_lookup_active = RegEnable(s2_lookup_active, s2_lookup_valid) 420 io.stream_lookup_resp := s3_lookup_valid && s3_lookup_hit && s3_lookup_active 421 422 // reset meta to avoid muti-hit problem 423 for(i <- 0 until BIT_VEC_ARRAY_SIZE) { 424 when(GatedValidRegNext(io.flush)) { 425 reset_array(i) 426 } 427 } 428 429 XSPerfHistogram("bit_vector_active", PopCount(VecInit(array.map(_.active)).asUInt), true.B, 0, BIT_VEC_ARRAY_SIZE, 1) 430 XSPerfHistogram("bit_vector_decr_mode", PopCount(VecInit(array.map(_.decr_mode)).asUInt), true.B, 0, BIT_VEC_ARRAY_SIZE, 1) 431 XSPerfAccumulate("hash_conflict", s0_valid && s2_valid && (s0_region_tag =/= s2_region_tag) && (region_hash_tag(s0_region_tag) === region_hash_tag(s2_region_tag))) 432}