1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27import xiangshan.mem._ 28import xiangshan.backend.fu.vector.Bundles._ 29 30 31class VSplitPipeline(isVStore: Boolean = false)(implicit p: Parameters) extends VLSUModule{ 32 val io = IO(new VSplitPipelineIO(isVStore)) 33 // will be override later 34 def us_whole_reg(fuOpType: UInt): Bool = false.B 35 def us_mask(fuOpType: UInt): Bool = false.B 36 def us_fof(fuOpType: UInt): Bool = false.B 37 //TODO vdIdxReg should no longer be useful, don't delete it for now 38 val vdIdxReg = RegInit(0.U(3.W)) 39 40 val s1_ready = WireInit(false.B) 41 io.in.ready := s1_ready 42 43 /**----------------------------------------------------------- 44 * s0 stage 45 * decode and generate AlignedType, uop mask, preIsSplit 46 * ---------------------------------------------------------- 47 */ 48 val s0_vtype = io.in.bits.uop.vpu.vtype 49 val s0_sew = s0_vtype.vsew 50 val s0_eew = io.in.bits.uop.vpu.veew 51 val s0_lmul = s0_vtype.vlmul 52 // when load whole register or unit-stride masked , emul should be 1 53 val s0_fuOpType = io.in.bits.uop.fuOpType 54 val s0_mop = s0_fuOpType(6, 5) 55 val s0_nf = Mux(us_whole_reg(s0_fuOpType), 0.U, io.in.bits.uop.vpu.nf) 56 val s0_vm = io.in.bits.uop.vpu.vm 57 val s0_emul = Mux(us_whole_reg(s0_fuOpType) ,GenUSWholeEmul(io.in.bits.uop.vpu.nf), Mux(us_mask(s0_fuOpType), 0.U(mulBits.W), EewLog2(s0_eew) - s0_sew + s0_lmul)) 58 val s0_preIsSplit = !(isUnitStride(s0_mop) && !us_fof(s0_fuOpType)) 59 val s0_nfield = s0_nf +& 1.U 60 61 val s0_valid = Wire(Bool()) 62 val s0_kill = io.in.bits.uop.robIdx.needFlush(io.redirect) 63 val s0_can_go = s1_ready 64 val s0_fire = s0_valid && s0_can_go 65 val s0_out = Wire(new VLSBundle(isVStore)) 66 67 val isUsWholeReg = isUnitStride(s0_mop) && us_whole_reg(s0_fuOpType) 68 val isMaskReg = isUnitStride(s0_mop) && us_mask(s0_fuOpType) 69 val isSegment = s0_nf =/= 0.U && !us_whole_reg(s0_fuOpType) 70 val instType = Cat(isSegment, s0_mop) 71 val uopIdx = io.in.bits.uop.vpu.vuopIdx 72 val uopIdxInField = GenUopIdxInField(instType, s0_emul, s0_lmul, uopIdx) 73 val vdIdxInField = GenVdIdxInField(instType, s0_emul, s0_lmul, uopIdxInField) 74 val lmulLog2 = Mux(s0_lmul.asSInt >= 0.S, 0.U, s0_lmul) 75 val emulLog2 = Mux(s0_emul.asSInt >= 0.S, 0.U, s0_emul) 76 val numEewLog2 = emulLog2 - EewLog2(s0_eew) 77 val numSewLog2 = lmulLog2 - s0_sew 78 val numFlowsSameVdLog2 = Mux( 79 isIndexed(instType), 80 log2Up(VLENB).U - s0_sew(1,0), 81 log2Up(VLENB).U - s0_eew(1,0) 82 ) 83 // numUops = nf * max(lmul, emul) 84 val lmulLog2Pos = Mux(s0_lmul.asSInt < 0.S, 0.U, s0_lmul) 85 val emulLog2Pos = Mux(s0_emul.asSInt < 0.S, 0.U, s0_emul) 86 val numUops = Mux( 87 isIndexed(s0_mop) && s0_lmul.asSInt > s0_emul.asSInt, 88 (s0_nf +& 1.U) << lmulLog2Pos, 89 (s0_nf +& 1.U) << emulLog2Pos 90 ) 91 92 val vvl = io.in.bits.src_vl.asTypeOf(VConfig()).vl 93 val evl = Mux(isUsWholeReg, 94 GenUSWholeRegVL(io.in.bits.uop.vpu.nf +& 1.U, s0_eew), 95 Mux(isMaskReg, 96 GenUSMaskRegVL(vvl), 97 vvl)) 98 val vvstart = io.in.bits.uop.vpu.vstart 99 val alignedType = Mux(isIndexed(instType), s0_sew(1, 0), s0_eew(1, 0)) 100 val broadenAligendType = Mux(s0_preIsSplit, Cat("b0".U, alignedType), "b100".U) // if is unit-stride, use 128-bits memory access 101 val flowsLog2 = GenRealFlowLog2(instType, s0_emul, s0_lmul, s0_eew, s0_sew) 102 val flowsPrevThisUop = (uopIdxInField << flowsLog2).asUInt // # of flows before this uop in a field 103 val flowsPrevThisVd = (vdIdxInField << numFlowsSameVdLog2).asUInt // # of flows before this vd in a field 104 val flowsIncludeThisUop = ((uopIdxInField +& 1.U) << flowsLog2).asUInt // # of flows before this uop besides this uop 105 val flowNum = io.in.bits.flowNum.get 106 107 // For vectore indexed instructions: 108 // When emul is greater than lmul, multiple uop correspond to a Vd, e.g: 109 // vsetvli t1,t0,e8,m1,ta,ma lmul = 1 110 // vluxei16.v v2,(a0),v8 emul = 2 111 // In this case, we need to ensure the flownumis right shift by flowsPrevThisUop, However, the mask passed to mergebuff is right shift by flowsPrevThisVd e.g: 112 // vl = 9 113 // srcMask = 0x1FF 114 // uopIdxInField = 0 and vdIdxInField = 0, flowMask = 0x00FF, toMergeBuffMask = 0x01FF 115 // uopIdxInField = 1 and vdIdxInField = 0, flowMask = 0x0001, toMergeBuffMask = 0x01FF 116 // uopIdxInField = 0 and vdIdxInField = 0, flowMask = 0x0000, toMergeBuffMask = 0x0000 117 // uopIdxInField = 0 and vdIdxInField = 0, flowMask = 0x0000, toMergeBuffMask = 0x0000 118 val isSpecialIndexed = isIndexed(instType) && s0_emul.asSInt > s0_lmul.asSInt 119 120 val srcMask = GenFlowMask(Mux(s0_vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vvstart, evl, true) 121 val srcMaskShiftBits = Mux(isSpecialIndexed, flowsPrevThisUop, flowsPrevThisVd) 122 123 val flowMask = ((srcMask & 124 UIntToMask(flowsIncludeThisUop.asUInt, VLEN + 1) & 125 (~UIntToMask(flowsPrevThisUop.asUInt, VLEN)).asUInt 126 ) >> srcMaskShiftBits)(VLENB - 1, 0) 127 val indexedSrcMask = (srcMask >> flowsPrevThisVd).asUInt //only for index instructions 128 129 // Used to calculate the element index. 130 // See 'splitbuffer' for 'io.out.splitIdxOffset' and 'mergebuffer' for 'merge data' 131 val indexedSplitOffset = Mux(isSpecialIndexed, flowsPrevThisUop - flowsPrevThisVd, 0.U) // only for index instructions of emul > lmul 132 val vlmax = GenVLMAX(s0_lmul, s0_sew) 133 134 // connect 135 s0_out := DontCare 136 s0_out match {case x => 137 x.uop := io.in.bits.uop 138 x.uop.vpu.vl := evl 139 x.uop.uopIdx := uopIdx 140 x.uop.numUops := numUops 141 x.uop.lastUop := (uopIdx +& 1.U) === numUops 142 x.uop.vpu.nf := s0_nf 143 x.flowMask := flowMask 144 x.indexedSrcMask := indexedSrcMask // Only vector indexed instructions uses it 145 x.indexedSplitOffset := indexedSplitOffset 146 x.byteMask := GenUopByteMask(flowMask, Cat("b0".U, alignedType))(VLENB - 1, 0) 147 x.fof := isUnitStride(s0_mop) && us_fof(s0_fuOpType) 148 x.baseAddr := io.in.bits.src_rs1 149 x.stride := io.in.bits.src_stride 150 x.flowNum := flowNum 151 x.nfields := s0_nfield 152 x.vm := s0_vm 153 x.usWholeReg := isUsWholeReg 154 x.usMaskReg := isMaskReg 155 x.eew := s0_eew 156 x.sew := s0_sew 157 x.emul := s0_emul 158 x.lmul := s0_lmul 159 x.vlmax := Mux(isUsWholeReg, evl, vlmax) 160 x.instType := instType 161 x.data := io.in.bits.src_vs3 162 x.vdIdxInField := vdIdxInField 163 x.preIsSplit := s0_preIsSplit 164 x.alignedType := broadenAligendType 165 } 166 s0_valid := io.in.valid && !s0_kill 167 /**------------------------------------- 168 * s1 stage 169 * ------------------------------------ 170 * generate UopOffset 171 */ 172 val s1_valid = RegInit(false.B) 173 val s1_kill = Wire(Bool()) 174 val s1_in = Wire(new VLSBundle(isVStore)) 175 val s1_can_go = io.out.ready && io.toMergeBuffer.resp.valid 176 val s1_fire = s1_valid && !s1_kill && s1_can_go 177 178 s1_ready := s1_kill || !s1_valid || io.out.ready && io.toMergeBuffer.resp.valid 179 180 when(s0_fire){ 181 s1_valid := true.B 182 }.elsewhen(s1_fire){ 183 s1_valid := false.B 184 }.elsewhen(s1_kill){ 185 s1_valid := false.B 186 } 187 s1_in := RegEnable(s0_out, s0_fire) 188 189 val s1_flowNum = s1_in.flowNum 190 val s1_uopidx = s1_in.uop.vpu.vuopIdx 191 val s1_nf = s1_in.uop.vpu.nf 192 val s1_nfields = s1_in.nfields 193 val s1_eew = s1_in.eew 194 val s1_emul = s1_in.emul 195 val s1_lmul = s1_in.lmul 196 val s1_instType = s1_in.instType 197 val s1_stride = s1_in.stride 198 val s1_vmask = FillInterleaved(8, s1_in.byteMask)(VLEN-1, 0) 199 val s1_alignedType = s1_in.alignedType 200 val s1_isSpecialIndexed = isIndexed(s1_instType) && s1_emul.asSInt > s1_lmul.asSInt 201 val s1_mask = Mux(s1_isSpecialIndexed, s1_in.indexedSrcMask, s1_in.flowMask) 202 val s1_vdIdx = s1_in.vdIdxInField 203 val s1_fof = s1_in.fof 204 val s1_notIndexedStride = Mux( // stride for strided/unit-stride instruction 205 isStrided(s1_instType), 206 s1_stride(XLEN - 1, 0), // for strided load, stride = x[rs2] 207 s1_nfields << s1_eew(1, 0) // for unit-stride load, stride = eew * NFIELDS 208 ) 209 210 val stride = Mux(isIndexed(s1_instType), s1_stride, s1_notIndexedStride).asUInt // if is index instructions, get index when split 211 val uopOffset = genVUopOffset(s1_instType, s1_fof, s1_uopidx, s1_nf, s1_eew(1, 0), stride, s1_alignedType) 212 val activeNum = Mux(s1_in.preIsSplit, PopCount(s1_in.flowMask), s1_flowNum) 213 214 s1_kill := s1_in.uop.robIdx.needFlush(io.redirect) 215 216 // query mergeBuffer 217 io.toMergeBuffer.req.valid := s1_fire // only can_go will get MergeBuffer entry 218 io.toMergeBuffer.req.bits.flowNum := activeNum 219 io.toMergeBuffer.req.bits.data := s1_in.data 220 io.toMergeBuffer.req.bits.uop := s1_in.uop 221 io.toMergeBuffer.req.bits.mask := s1_mask 222 io.toMergeBuffer.req.bits.vaddr := DontCare 223 io.toMergeBuffer.req.bits.vdIdx := s1_vdIdx //TODO vdIdxReg should no longer be useful, don't delete it for now 224 io.toMergeBuffer.req.bits.fof := s1_in.fof 225 io.toMergeBuffer.req.bits.vlmax := s1_in.vlmax 226// io.toMergeBuffer.req.bits.vdOffset := 227 228 //TODO vdIdxReg should no longer be useful, don't delete it for now 229// when (s1_in.uop.lastUop && s1_fire || s1_kill) { 230// vdIdxReg := 0.U 231// }.elsewhen(s1_fire) { 232// vdIdxReg := vdIdxReg + 1.U 233// XSError(vdIdxReg + 1.U === 0.U, s"Overflow! The number of vd should be less than 8\n") 234// } 235 // out connect 236 io.out.valid := s1_valid && io.toMergeBuffer.resp.valid && (activeNum =/= 0.U) // if activeNum == 0, this uop do nothing, can be killed. 237 io.out.bits := s1_in 238 io.out.bits.uopOffset := uopOffset 239 io.out.bits.stride := stride 240 io.out.bits.mBIndex := io.toMergeBuffer.resp.bits.mBIndex 241 242 XSPerfAccumulate("split_out", io.out.fire) 243 XSPerfAccumulate("pipe_block", io.out.valid && !io.out.ready) 244 XSPerfAccumulate("mbuffer_block", s1_valid && io.out.ready && !io.toMergeBuffer.resp.valid) 245} 246 247abstract class VSplitBuffer(isVStore: Boolean = false)(implicit p: Parameters) extends VLSUModule{ 248 val io = IO(new VSplitBufferIO(isVStore)) 249 250 val bufferSize: Int 251 private val freeWidth = Seq(io.out).length 252 private val allocWidth = Seq(io.in).length 253 254 // freelist 255 val freeList = Module(new FreeList( 256 size = bufferSize, 257 allocWidth = allocWidth, 258 freeWidth = freeWidth, 259 enablePreAlloc = false, 260 moduleName = "VSplit Buffer freelist" 261 )) 262 263 val uopq = Reg(Vec(bufferSize, new VLSBundle(isVStore))) 264 val allocated = RegInit(VecInit(Seq.fill(bufferSize)(false.B))) 265 val allocatedWire = WireInit(VecInit(Seq.fill(bufferSize)(false.B))) // for back to back split, advance lower 266 val freeMask = WireInit(VecInit(Seq.fill(bufferSize)(false.B))) 267 val needCancel = WireInit(VecInit(Seq.fill(bufferSize)(false.B))) 268 val activeIssue = Wire(Bool()) 269 val inActiveIssue = Wire(Bool()) 270 271 // for split 272 val splitIdx = RegInit(0.U(flowIdxBits.W)) 273 val strideOffsetReg = RegInit(0.U(VLEN.W)) 274 275 /** 276 * Redirect 277 */ 278 val cancelEnq = io.in.bits.uop.robIdx.needFlush(io.redirect) 279 val canEnqueue = io.in.valid 280 val needEnqueue = canEnqueue && !cancelEnq 281 282 // enqueue 283 freeList.io.doAllocate.head := false.B 284 freeList.io.allocateReq.head := true.B 285 val offset = PopCount(needEnqueue) 286 val canAccept = freeList.io.canAllocate(offset) 287 val enqIndex = freeList.io.allocateSlot(offset) 288 io.in.ready := canAccept 289 val doEnqueue = canAccept && needEnqueue 290 291 when(doEnqueue){ 292 freeList.io.doAllocate.head := true.B 293 uopq(enqIndex) := io.in.bits 294 } 295 freeList.io.free := freeMask.asUInt 296 297 // select one uop 298 val selPolicy = SelectOne("circ", allocatedWire, freeWidth) // select one entry to split 299 val (selValid, selOHVec) = selPolicy.getNthOH(1) 300 val entryIdx = OHToUInt(selOHVec) 301 302 /* latch selentry, wait split or redirect*/ 303 val splitFinish = WireInit(false.B) 304 val selValidReg = RegInit(false.B) 305 val selIdxReg = Reg(UInt(entryIdx.getWidth.W)) 306 307 // 0 -> 1 308 when(selValid && !selValidReg){ 309 selValidReg := true.B 310 } 311 // 1 -> 0 312 when((uopq(selIdxReg).uop.robIdx.needFlush(io.redirect) || !selValid && splitFinish && (activeIssue || inActiveIssue)) && 313 selValidReg){ 314 315 selValidReg := false.B 316 } 317 // have new uop need to split and last uop is split finish 318 when((selValid && !selValidReg) || 319 (selValid && selValidReg && splitFinish && (activeIssue || inActiveIssue))){ 320 321 selIdxReg := entryIdx 322 } 323 324 //split uops 325 val issueValid = allocated(selIdxReg) && selValidReg 326 val issueEntry = uopq(selIdxReg) 327 val issueMbIndex = issueEntry.mBIndex 328 val issueFlowNum = issueEntry.flowNum 329 val issueBaseAddr = issueEntry.baseAddr 330 val issueUop = issueEntry.uop 331 val issueUopIdx = issueUop.vpu.vuopIdx 332 val issueInstType = issueEntry.instType 333 val issueUopOffset = issueEntry.uopOffset 334 val issueEew = issueEntry.eew 335 val issueSew = issueEntry.sew 336 val issueLmul = issueEntry.lmul 337 val issueEmul = issueEntry.emul 338 val issueAlignedType = issueEntry.alignedType 339 val issuePreIsSplit = issueEntry.preIsSplit 340 val issueByteMask = issueEntry.byteMask 341 val issueVLMAXMask = issueEntry.vlmax - 1.U 342 val issueIsWholeReg = issueEntry.usWholeReg 343 val issueVLMAXLog2 = GenVLMAXLog2(issueEntry.lmul, issueSew) 344 val elemIdx = GenElemIdx( 345 instType = issueInstType, 346 emul = issueEmul, 347 lmul = issueLmul, 348 eew = issueEew, 349 sew = issueSew, 350 uopIdx = issueUopIdx, 351 flowIdx = splitIdx 352 ) // elemIdx inside an inst, for exception 353 354 val splitIdxOffset = issueEntry.indexedSplitOffset + splitIdx 355 356 val elemIdxInsideField = elemIdx & issueVLMAXMask 357 val indexFlowInnerIdx = ((elemIdxInsideField << issueEew(1, 0))(vOffsetBits - 1, 0) >> issueEew(1, 0)).asUInt 358 val nfIdx = Mux(issueIsWholeReg, 0.U, elemIdx >> issueVLMAXLog2) 359 val fieldOffset = nfIdx << issueAlignedType // field offset inside a segment 360 361 val indexedStride = IndexAddr( // index for indexed instruction 362 index = issueEntry.stride, 363 flow_inner_idx = indexFlowInnerIdx, 364 eew = issueEew 365 ) 366 val issueStride = Mux(isIndexed(issueInstType), indexedStride, strideOffsetReg) 367 val vaddr = issueBaseAddr + issueUopOffset + issueStride 368 val mask = genVWmask128(vaddr ,issueAlignedType) // scala maske for flow 369 val flowMask = issueEntry.flowMask 370 val vecActive = (flowMask & UIntToOH(splitIdx)).orR 371 /* 372 * Unit-Stride split to one flow or two flow. 373 * for Unit-Stride, if uop's addr is aligned with 128-bits, split it to one flow, otherwise split two 374 */ 375 376 val usAligned128 = (vaddr(3,0) === 0.U)// addr 128-bit aligned 377 val usSplitMask = genUSSplitMask(issueByteMask, splitIdx, vaddr(3,0)) 378 val usNoSplit = (usAligned128 || !(vaddr(3,0) +& PopCount(usSplitMask))(4)) && !issuePreIsSplit && (splitIdx === 0.U)// unit-stride uop don't need to split into two flow 379 val usSplitVaddr = genUSSplitAddr(vaddr, splitIdx) 380 val regOffset = vaddr(3,0) // offset in 256-bits vd 381 XSError((splitIdx > 1.U && usNoSplit) || (splitIdx > 1.U && !issuePreIsSplit) , "Unit-Stride addr split error!\n") 382 383 // data 384 io.out.bits match { case x => 385 x.uop := issueUop 386 x.vaddr := Mux(!issuePreIsSplit, usSplitVaddr, vaddr) 387 x.alignedType := issueAlignedType 388 x.isvec := true.B 389 x.mask := Mux(!issuePreIsSplit, usSplitMask, mask) 390 x.reg_offset := regOffset //for merge unit-stride data 391 x.vecActive := Mux(!issuePreIsSplit, true.B, vecActive) // currently, unit-stride's flow always send to pipeline 392 x.is_first_ele := DontCare 393 x.usSecondInv := usNoSplit 394 x.elemIdx := elemIdx 395 x.elemIdxInsideVd := splitIdxOffset // if is Unit-Stride, elemIdx is the index of 2 splited mem request (for merge data) 396 x.uop_unit_stride_fof := DontCare 397 x.isFirstIssue := DontCare 398 x.mBIndex := issueMbIndex 399 } 400 401 // redirect 402 for (i <- 0 until bufferSize){ 403 needCancel(i) := uopq(i).uop.robIdx.needFlush(io.redirect) && allocated(i) 404 } 405 406 /* Execute logic */ 407 /** Issue to scala pipeline**/ 408 val allowIssue = io.out.ready 409 val issueCount = Mux(usNoSplit, 2.U, (PopCount(inActiveIssue) + PopCount(activeIssue))) // for dont need split unit-stride, issue two flow 410 splitFinish := splitIdx >= (issueFlowNum - issueCount) 411 412 // handshake 413 activeIssue := issueValid && allowIssue && (vecActive || !issuePreIsSplit) // active issue, current use in no unit-stride 414 inActiveIssue := issueValid && !vecActive && issuePreIsSplit 415 when (!issueEntry.uop.robIdx.needFlush(io.redirect)) { 416 when (!splitFinish) { 417 when (activeIssue || inActiveIssue) { 418 // The uop has not been entirly splited yet 419 splitIdx := splitIdx + issueCount 420 strideOffsetReg := Mux(!issuePreIsSplit, strideOffsetReg, strideOffsetReg + issueEntry.stride) // when normal unit-stride, don't use strideOffsetReg 421 } 422 }.otherwise { 423 when (activeIssue || inActiveIssue) { 424 // The uop is done spliting 425 splitIdx := 0.U(flowIdxBits.W) // initialize flowIdx 426 strideOffsetReg := 0.U 427 } 428 } 429 }.otherwise { 430 splitIdx := 0.U(flowIdxBits.W) // initialize flowIdx 431 strideOffsetReg := 0.U 432 } 433 // allocatedWire, only for freelist select next uop back-to-back 434 for (i <- 0 until bufferSize){ 435 when(needCancel(i)){ // redirect 436 allocatedWire(i) := false.B 437 }.elsewhen(splitFinish && (activeIssue || inActiveIssue) && (i.U === selIdxReg)){ // finish 438 allocatedWire(i) := false.B 439 }.otherwise{ 440 allocatedWire(i) := allocated(i) 441 } 442 } 443 // allocated 444 for (i <- 0 until bufferSize){ 445 when(needCancel(i)) { // redirect 446 allocated(i) := false.B 447 }.elsewhen(splitFinish && (activeIssue || inActiveIssue) && (i.U === selIdxReg)){ //dequeue 448 allocated(i) := false.B 449 }.elsewhen(doEnqueue && (i.U === enqIndex)){ 450 allocated(i) := true.B 451 } 452 } 453 // freeMask 454 for (i <- 0 until bufferSize){ 455 when(needCancel(i)) { // redirect 456 freeMask(i) := true.B 457 }.elsewhen(splitFinish && (activeIssue || inActiveIssue) && (i.U === selIdxReg)) { //dequeue 458 freeMask(i) := true.B 459 }.otherwise{ 460 freeMask(i) := false.B 461 } 462 } 463 464 // out connect 465 io.out.valid := issueValid && (vecActive || !issuePreIsSplit) // TODO: inactive unit-stride uop do not send to pipeline 466 467 XSError(!allocated(entryIdx) && selValid, "select invalid entry!") 468 469 XSPerfAccumulate("out_valid", io.out.valid) 470 XSPerfAccumulate("out_fire", io.out.fire) 471 XSPerfAccumulate("out_fire_unitstride", io.out.fire && !issuePreIsSplit) 472 XSPerfAccumulate("unitstride_vlenAlign", io.out.fire && !issuePreIsSplit && io.out.bits.vaddr(3, 0) === 0.U) 473 XSPerfAccumulate("unitstride_invalid", io.out.ready && issueValid && !issuePreIsSplit && PopCount(io.out.bits.mask).orR) 474 475 QueuePerf(bufferSize, freeList.io.validCount, freeList.io.validCount === 0.U) 476} 477 478class VSSplitBufferImp(implicit p: Parameters) extends VSplitBuffer(isVStore = true){ 479 override lazy val bufferSize = SplitBufferSize 480 // split data 481 val splitData = genVSData( 482 data = issueEntry.data.asUInt, 483 elemIdx = splitIdxOffset, 484 alignedType = issueAlignedType 485 ) 486 val flowData = genVWdata(splitData, issueAlignedType) 487 val usSplitData = genUSSplitData(issueEntry.data.asUInt, splitIdx, vaddr(3,0)) 488 489 val sqIdx = issueUop.sqIdx + splitIdx 490 io.out.bits.uop.sqIdx := sqIdx 491 492 // send data to sq 493 val vstd = io.vstd.get 494 vstd.valid := issueValid && (vecActive || !issuePreIsSplit) 495 vstd.bits.uop := issueUop 496 vstd.bits.uop.sqIdx := sqIdx 497 vstd.bits.data := Mux(!issuePreIsSplit, usSplitData, flowData) 498 vstd.bits.debug := DontCare 499 vstd.bits.vdIdx.get := DontCare 500 vstd.bits.vdIdxInField.get := DontCare 501 vstd.bits.mask.get := Mux(!issuePreIsSplit, usSplitMask, mask) 502 503} 504 505class VLSplitBufferImp(implicit p: Parameters) extends VSplitBuffer(isVStore = false){ 506 override lazy val bufferSize = SplitBufferSize 507 io.out.bits.uop.lqIdx := issueUop.lqIdx + splitIdx 508} 509 510class VSSplitPipelineImp(implicit p: Parameters) extends VSplitPipeline(isVStore = true){ 511 override def us_whole_reg(fuOpType: UInt): Bool = fuOpType === VstuType.vsr 512 override def us_mask(fuOpType: UInt): Bool = fuOpType === VstuType.vsm 513 override def us_fof(fuOpType: UInt): Bool = false.B // dont have vector fof store 514} 515 516class VLSplitPipelineImp(implicit p: Parameters) extends VSplitPipeline(isVStore = false){ 517 518 override def us_whole_reg(fuOpType: UInt): Bool = fuOpType === VlduType.vlr 519 override def us_mask(fuOpType: UInt): Bool = fuOpType === VlduType.vlm 520 override def us_fof(fuOpType: UInt): Bool = fuOpType === VlduType.vleff 521} 522 523class VLSplitImp(implicit p: Parameters) extends VLSUModule{ 524 val io = IO(new VSplitIO(isVStore=false)) 525 val splitPipeline = Module(new VLSplitPipelineImp()) 526 val splitBuffer = Module(new VLSplitBufferImp()) 527 // Split Pipeline 528 splitPipeline.io.in <> io.in 529 splitPipeline.io.redirect <> io.redirect 530 io.toMergeBuffer <> splitPipeline.io.toMergeBuffer 531 532 // Split Buffer 533 splitBuffer.io.in <> splitPipeline.io.out 534 splitBuffer.io.redirect <> io.redirect 535 io.out <> splitBuffer.io.out 536} 537 538class VSSplitImp(implicit p: Parameters) extends VLSUModule{ 539 val io = IO(new VSplitIO(isVStore=true)) 540 val splitPipeline = Module(new VSSplitPipelineImp()) 541 val splitBuffer = Module(new VSSplitBufferImp()) 542 // Split Pipeline 543 splitPipeline.io.in <> io.in 544 splitPipeline.io.redirect <> io.redirect 545 io.toMergeBuffer <> splitPipeline.io.toMergeBuffer 546 547 // Split Buffer 548 splitBuffer.io.in <> splitPipeline.io.out 549 splitBuffer.io.redirect <> io.redirect 550 io.out <> splitBuffer.io.out 551 io.vstd.get <> splitBuffer.io.vstd.get 552} 553 554