1package xiangshan.backend.fu.wrapper 2 3import org.chipsalliance.cde.config.Parameters 4import chisel3._ 5import chisel3.util._ 6import utils.XSError 7import xiangshan.backend.fu.FuConfig 8import xiangshan.backend.fu.vector.Bundles.{VLmul, VSew, ma} 9import xiangshan.backend.fu.vector.utils.VecDataSplitModule 10import xiangshan.backend.fu.vector.{Mgu, Mgtu, VecInfo, VecPipedFuncUnit} 11import xiangshan.ExceptionNO 12import yunsuan.{VfaluType, VfpuType} 13import yunsuan.vector.VectorFloatAdder 14 15class VFAlu(cfg: FuConfig)(implicit p: Parameters) extends VecPipedFuncUnit(cfg) { 16 XSError(io.in.valid && io.in.bits.ctrl.fuOpType === VfpuType.dummy, "Vfalu OpType not supported") 17 18 // params alias 19 private val dataWidth = cfg.dataBits 20 private val dataWidthOfDataModule = 64 21 private val numVecModule = dataWidth / dataWidthOfDataModule 22 23 // io alias 24 private val opcode = fuOpType(4,0) 25 private val resWiden = fuOpType(5) 26 private val opbWiden = fuOpType(6) 27 28 // modules 29 private val vfalus = Seq.fill(numVecModule)(Module(new VectorFloatAdder)) 30 private val vs2Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule)) 31 private val vs1Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule)) 32 private val oldVdSplit = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule)) 33 private val mgu = Module(new Mgu(dataWidth)) 34 private val mgtu = Module(new Mgtu(dataWidth)) 35 36 /** 37 * In connection of [[vs2Split]], [[vs1Split]] and [[oldVdSplit]] 38 */ 39 vs2Split.io.inVecData := vs2 40 vs1Split.io.inVecData := vs1 41 oldVdSplit.io.inVecData := oldVd 42 43 /** 44 * [[vfalus]]'s in connection 45 */ 46 // Vec(vs2(31,0), vs2(63,32), vs2(95,64), vs2(127,96)) ==> 47 // Vec( 48 // Cat(vs2(95,64), vs2(31,0)), 49 // Cat(vs2(127,96), vs2(63,32)), 50 // ) 51 private val vs2GroupedVec: Vec[UInt] = VecInit(vs2Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq) 52 private val vs1GroupedVec: Vec[UInt] = VecInit(vs1Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq) 53 private val resultData = Wire(Vec(numVecModule,UInt(dataWidthOfDataModule.W))) 54 private val fflagsData = Wire(Vec(numVecModule,UInt(20.W))) 55 private val srcMaskRShiftForReduction = Wire(UInt((8 * numVecModule).W)) 56 // for reduction 57 val isFirstGroupUop = vuopIdx === 0.U || 58 (vuopIdx === 1.U && (vlmul === VLmul.m4 || vlmul === VLmul.m8)) || 59 ((vuopIdx === 2.U || vuopIdx === 3.U) && vlmul === VLmul.m8) 60 val maskRshiftWidthForReduction = Wire(UInt(6.W)) 61 maskRshiftWidthForReduction := Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, 62 vuopIdx, 63 Mux1H(Seq( 64 (vsew === VSew.e16) -> (vuopIdx(1, 0) << 4), 65 (vsew === VSew.e32) -> (vuopIdx(1, 0) << 3), 66 (vsew === VSew.e64) -> (vuopIdx(1, 0) << 2), 67 )) 68 ) 69 val vlMaskForReduction = (~(Fill(VLEN, 1.U) << vl)).asUInt 70 srcMaskRShiftForReduction := ((srcMask & vlMaskForReduction) >> maskRshiftWidthForReduction)(8 * numVecModule - 1, 0) 71 72 def genMaskForReduction(inmask: UInt, sew: UInt, i: Int): UInt = { 73 val f64MaskNum = dataWidth / 64 * 2 74 val f32MaskNum = dataWidth / 32 * 2 75 val f16MaskNum = dataWidth / 16 * 2 76 val f64Mask = inmask(f64MaskNum - 1, 0) 77 val f32Mask = inmask(f32MaskNum - 1, 0) 78 val f16Mask = inmask(f16MaskNum - 1, 0) 79 // vs2 reordered, so mask use high bits 80 val f64FirstFoldMaskUnorder = Mux1H( 81 Seq( 82 vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(3.W), f64Mask(0), 0.U(3.W), f64Mask(1)), 83 ) 84 ) 85 val f64FirstFoldMaskOrder = Mux1H( 86 Seq( 87 vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(3.W), f64Mask(1), 0.U(3.W), f64Mask(0)) 88 ) 89 ) 90 val f32FirstFoldMaskUnorder = Mux1H( 91 Seq( 92 vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(2.W), f32Mask(1), f32Mask(0), 0.U(2.W), f32Mask(3), f32Mask(2)), 93 vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(3.W), f32Mask(0), 0.U(3.W), f32Mask(1)), 94 ) 95 ) 96 val f32FirstFoldMaskOrder = Mux1H( 97 Seq( 98 vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(2.W), f32Mask(3), f32Mask(2), 0.U(2.W), f32Mask(1), f32Mask(0)), 99 vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(3.W), f32Mask(1), 0.U(3.W), f32Mask(0)), 100 ) 101 ) 102 val f16FirstFoldMaskUnorder = Mux1H( 103 Seq( 104 vecCtrl.fpu.isFoldTo1_2 -> Cat(f16Mask(7,4), f16Mask(3,0)), 105 vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(2.W), f16Mask(1), f16Mask(0), 0.U(2.W), f16Mask(3), f16Mask(2)), 106 vecCtrl.fpu.isFoldTo1_8 -> Cat(0.U(3.W), f16Mask(0), 0.U(3.W), f16Mask(1)), 107 ) 108 ) 109 val f16FirstFoldMaskOrder = Mux1H( 110 Seq( 111 vecCtrl.fpu.isFoldTo1_2 -> Cat(f16Mask(7,4), f16Mask(3,0)), 112 vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(2.W), f16Mask(3), f16Mask(2), 0.U(2.W), f16Mask(1), f16Mask(0)), 113 vecCtrl.fpu.isFoldTo1_8 -> Cat(0.U(3.W), f16Mask(1), 0.U(3.W), f16Mask(0)), 114 ) 115 ) 116 val f64FoldMask = Mux1H( 117 Seq( 118 vecCtrl.fpu.isFoldTo1_2 -> "b00010001".U, 119 ) 120 ) 121 val f32FoldMask = Mux1H( 122 Seq( 123 vecCtrl.fpu.isFoldTo1_2 -> "b00110011".U, 124 vecCtrl.fpu.isFoldTo1_4 -> "b00010001".U, 125 ) 126 ) 127 val f16FoldMask = Mux1H( 128 Seq( 129 vecCtrl.fpu.isFoldTo1_2 -> "b11111111".U, 130 vecCtrl.fpu.isFoldTo1_4 -> "b00110011".U, 131 vecCtrl.fpu.isFoldTo1_8 -> "b00010001".U, 132 ) 133 ) 134 // low 4 bits for vs2(fp_a), high 4 bits for vs1(fp_b), 135 val isFold = vecCtrl.fpu.isFoldTo1_2 || vecCtrl.fpu.isFoldTo1_4 || vecCtrl.fpu.isFoldTo1_8 136 val f64FirstNotFoldMask = Cat(0.U(3.W), f64Mask(i + 2), 0.U(3.W), f64Mask(i)) 137 val f32FirstNotFoldMask = Cat(0.U(2.W), f32Mask(i * 2 + 5, i * 2 + 4), 0.U(2.W), Cat(f32Mask(i * 2 + 1, i * 2))) 138 val f16FirstNotFoldMask = Cat(f16Mask(i * 4 + 11, i * 4 + 8), f16Mask(i * 4 + 3, i * 4)) 139 val f64MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, 140 Mux(isFold, f64FirstFoldMaskOrder, f64FirstNotFoldMask), 141 Mux(isFirstGroupUop, 142 Mux(isFold, f64FirstFoldMaskUnorder, f64FirstNotFoldMask), 143 Mux(isFold, f64FoldMask, Fill(8, 1.U)))) 144 val f32MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, 145 Mux(isFold, f32FirstFoldMaskOrder, f32FirstNotFoldMask), 146 Mux(isFirstGroupUop, 147 Mux(isFold, f32FirstFoldMaskUnorder, f32FirstNotFoldMask), 148 Mux(isFold, f32FoldMask, Fill(8, 1.U)))) 149 val f16MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, 150 Mux(isFold, f16FirstFoldMaskOrder, f16FirstNotFoldMask), 151 Mux(isFirstGroupUop, 152 Mux(isFold, f16FirstFoldMaskUnorder, f16FirstNotFoldMask), 153 Mux(isFold, f16FoldMask, Fill(8, 1.U)))) 154 val outMask = Mux1H( 155 Seq( 156 (sew === 3.U) -> f64MaskI, 157 (sew === 2.U) -> f32MaskI, 158 (sew === 1.U) -> f16MaskI, 159 ) 160 ) 161 Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, outMask(0),outMask) 162 } 163 def genMaskForMerge(inmask:UInt, sew:UInt, i:Int): UInt = { 164 val f64MaskNum = dataWidth / 64 165 val f32MaskNum = dataWidth / 32 166 val f16MaskNum = dataWidth / 16 167 val f64Mask = inmask(f64MaskNum-1,0) 168 val f32Mask = inmask(f32MaskNum-1,0) 169 val f16Mask = inmask(f16MaskNum-1,0) 170 val f64MaskI = Cat(0.U(3.W),f64Mask(i)) 171 val f32MaskI = Cat(0.U(2.W),f32Mask(2*i+1,2*i)) 172 val f16MaskI = f16Mask(4*i+3,4*i) 173 val outMask = Mux1H( 174 Seq( 175 (sew === 3.U) -> f64MaskI, 176 (sew === 2.U) -> f32MaskI, 177 (sew === 1.U) -> f16MaskI, 178 ) 179 ) 180 outMask 181 } 182 val isScalarMove = (fuOpType === VfaluType.vfmv_f_s) || (fuOpType === VfaluType.vfmv_s_f) 183 val srcMaskRShift = Wire(UInt((4 * numVecModule).W)) 184 val maskRshiftWidth = Wire(UInt(6.W)) 185 maskRshiftWidth := Mux1H( 186 Seq( 187 (vsew === VSew.e16) -> (vuopIdx(2,0) << 3), 188 (vsew === VSew.e32) -> (vuopIdx(2,0) << 2), 189 (vsew === VSew.e64) -> (vuopIdx(2,0) << 1), 190 ) 191 ) 192 srcMaskRShift := (srcMask >> maskRshiftWidth)(4 * numVecModule - 1, 0) 193 val fp_aIsFpCanonicalNAN = Wire(Vec(numVecModule,Bool())) 194 val fp_bIsFpCanonicalNAN = Wire(Vec(numVecModule,Bool())) 195 vfalus.zipWithIndex.foreach { 196 case (mod, i) => 197 mod.io.fire := io.in.valid 198 mod.io.fp_a := Mux(opbWiden, vs1Split.io.outVec64b(i), vs2Split.io.outVec64b(i)) // very dirty TODO 199 mod.io.fp_b := Mux(opbWiden, vs2Split.io.outVec64b(i), vs1Split.io.outVec64b(i)) // very dirty TODO 200 mod.io.widen_a := Mux(opbWiden, Cat(vs1Split.io.outVec32b(i+numVecModule), vs1Split.io.outVec32b(i)), Cat(vs2Split.io.outVec32b(i+numVecModule), vs2Split.io.outVec32b(i))) 201 mod.io.widen_b := Mux(opbWiden, Cat(vs2Split.io.outVec32b(i+numVecModule), vs2Split.io.outVec32b(i)), Cat(vs1Split.io.outVec32b(i+numVecModule), vs1Split.io.outVec32b(i))) 202 mod.io.frs1 := 0.U // already vf -> vv 203 mod.io.is_frs1 := false.B // already vf -> vv 204 mod.io.mask := Mux(isScalarMove, !vuopIdx.orR, genMaskForMerge(inmask = srcMaskRShift, sew = vsew, i = i)) 205 mod.io.maskForReduction := genMaskForReduction(inmask = srcMaskRShiftForReduction, sew = vsew, i = i) 206 mod.io.uop_idx := Mux(fuOpType === VfaluType.vfwredosum, 0.U, vuopIdx(0)) 207 mod.io.is_vec := true.B // Todo 208 mod.io.round_mode := frm 209 mod.io.fp_format := Mux(resWiden, vsew + 1.U, vsew) 210 mod.io.opb_widening := opbWiden || (fuOpType === VfaluType.vfwredosum) 211 mod.io.res_widening := resWiden 212 mod.io.op_code := opcode 213 resultData(i) := mod.io.fp_result 214 fflagsData(i) := mod.io.fflags 215 fp_aIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & ( 216 ((vsew === VSew.e32) & (!vs2Split.io.outVec64b(i).head(32).andR)) | 217 ((vsew === VSew.e16) & (!vs2Split.io.outVec64b(i).head(48).andR)) 218 ) 219 fp_bIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & ( 220 ((vsew === VSew.e32) & (!vs1Split.io.outVec64b(i).head(32).andR)) | 221 ((vsew === VSew.e16) & (!vs1Split.io.outVec64b(i).head(48).andR)) 222 ) 223 mod.io.fp_aIsFpCanonicalNAN := fp_aIsFpCanonicalNAN(i) 224 mod.io.fp_bIsFpCanonicalNAN := fp_bIsFpCanonicalNAN(i) 225 } 226 val resultDataUInt = resultData.asUInt 227 val cmpResultWidth = dataWidth / 16 228 val cmpResult = Wire(Vec(cmpResultWidth, Bool())) 229 for (i <- 0 until cmpResultWidth) { 230 if(i == 0) { 231 cmpResult(i) := resultDataUInt(0) 232 } 233 else if(i < dataWidth / 64) { 234 cmpResult(i) := Mux1H( 235 Seq( 236 (outVecCtrl.vsew === 1.U) -> resultDataUInt(i*16), 237 (outVecCtrl.vsew === 2.U) -> resultDataUInt(i*32), 238 (outVecCtrl.vsew === 3.U) -> resultDataUInt(i*64) 239 ) 240 ) 241 } 242 else if(i < dataWidth / 32) { 243 cmpResult(i) := Mux1H( 244 Seq( 245 (outVecCtrl.vsew === 1.U) -> resultDataUInt(i * 16), 246 (outVecCtrl.vsew === 2.U) -> resultDataUInt(i * 32), 247 (outVecCtrl.vsew === 3.U) -> false.B 248 ) 249 ) 250 } 251 else if(i < dataWidth / 16) { 252 cmpResult(i) := Mux(outVecCtrl.vsew === 1.U, resultDataUInt(i*16), false.B) 253 } 254 } 255 256 val outEew = Mux(RegEnable(resWiden, io.in.fire), outVecCtrl.vsew + 1.U, outVecCtrl.vsew) 257 val outVuopidx = outVecCtrl.vuopIdx(2, 0) 258 val vlMax = ((VLEN/8).U >> outEew).asUInt 259 val lmulAbs = Mux(outVecCtrl.vlmul(2), (~outVecCtrl.vlmul(1,0)).asUInt + 1.U, outVecCtrl.vlmul(1,0)) 260 // vfmv_f_s need vl=1, reduction last uop need vl=1, other uop need vl=vlmax 261 val numOfUopVFRED = { 262 // addTime include add frs1 263 val addTime = MuxLookup(outVecCtrl.vlmul, 1.U(4.W), Array( 264 VLmul.m2 -> 2.U, 265 VLmul.m4 -> 4.U, 266 VLmul.m8 -> 8.U, 267 )) 268 val foldLastVlmul = MuxLookup(outVecCtrl.vsew, "b000".U, Array( 269 VSew.e16 -> VLmul.mf8, 270 VSew.e32 -> VLmul.mf4, 271 VSew.e64 -> VLmul.mf2, 272 )) 273 // lmul < 1, foldTime = vlmul - foldFastVlmul 274 // lmul >= 1, foldTime = 0.U - foldFastVlmul 275 val foldTime = Mux(outVecCtrl.vlmul(2), outVecCtrl.vlmul, 0.U) - foldLastVlmul 276 addTime + foldTime 277 } 278 val reductionVl = Mux((outVecCtrl.vuopIdx === numOfUopVFRED - 1.U) || (outCtrl.fuOpType === VfaluType.vfredosum || outCtrl.fuOpType === VfaluType.vfwredosum), 1.U, vlMax) 279 val outIsResuction = outCtrl.fuOpType === VfaluType.vfredusum || 280 outCtrl.fuOpType === VfaluType.vfredmax || 281 outCtrl.fuOpType === VfaluType.vfredmin || 282 outCtrl.fuOpType === VfaluType.vfredosum || 283 outCtrl.fuOpType === VfaluType.vfwredosum 284 val outVlFix = Mux( 285 outVecCtrl.fpu.isFpToVecInst || (outCtrl.fuOpType === VfaluType.vfmv_f_s), 286 1.U, 287 Mux( 288 outCtrl.fuOpType === VfaluType.vfmv_s_f, 289 outVl.orR, 290 Mux(outIsResuction, reductionVl, outVl) 291 ) 292 ) 293 val vlMaxAllUop = Wire(outVl.cloneType) 294 vlMaxAllUop := Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax << lmulAbs).asUInt 295 val vlMaxThisUop = Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax).asUInt 296 val vlSetThisUop = Mux(outVlFix > outVuopidx*vlMaxThisUop, outVlFix - outVuopidx*vlMaxThisUop, 0.U) 297 val vlThisUop = Wire(UInt(3.W)) 298 vlThisUop := Mux(vlSetThisUop < vlMaxThisUop, vlSetThisUop, vlMaxThisUop) 299 val vlMaskRShift = Wire(UInt((4 * numVecModule).W)) 300 vlMaskRShift := Fill(4 * numVecModule, 1.U(1.W)) >> ((4 * numVecModule).U - vlThisUop) 301 302 private val needNoMask = outCtrl.fuOpType === VfaluType.vfmerge || 303 outCtrl.fuOpType === VfaluType.vfmv_s_f || 304 outIsResuction || 305 outVecCtrl.fpu.isFpToVecInst 306 val maskToMgu = Mux(needNoMask, allMaskTrue, outSrcMask) 307 val allFFlagsEn = Wire(Vec(4*numVecModule,Bool())) 308 val outSrcMaskRShift = Wire(UInt((4*numVecModule).W)) 309 outSrcMaskRShift := (maskToMgu >> (outVecCtrl.vuopIdx(2,0) * vlMax))(4*numVecModule-1,0) 310 val f16FFlagsEn = outSrcMaskRShift 311 val f32FFlagsEn = Wire(Vec(numVecModule,UInt(4.W))) 312 for (i <- 0 until numVecModule){ 313 f32FFlagsEn(i) := Cat(Fill(2, 0.U),outSrcMaskRShift(2*i+1,2*i)) 314 } 315 val f64FFlagsEn = Wire(Vec(numVecModule, UInt(4.W))) 316 for (i <- 0 until numVecModule) { 317 f64FFlagsEn(i) := Cat(Fill(3, 0.U), outSrcMaskRShift(i)) 318 } 319 val fflagsEn= Mux1H( 320 Seq( 321 (outEew === 1.U) -> f16FFlagsEn.asUInt, 322 (outEew === 2.U) -> f32FFlagsEn.asUInt, 323 (outEew === 3.U) -> f64FFlagsEn.asUInt 324 ) 325 ) 326 allFFlagsEn := Mux(outIsResuction, Fill(4*numVecModule, 1.U), (fflagsEn & vlMaskRShift)).asTypeOf(allFFlagsEn) 327 328 val allFFlags = fflagsData.asTypeOf(Vec(4*numVecModule,UInt(5.W))) 329 val outFFlags = allFFlagsEn.zip(allFFlags).map{ 330 case(en,fflags) => Mux(en, fflags, 0.U(5.W)) 331 }.reduce(_ | _) 332 io.out.bits.res.fflags.get := outFFlags 333 334 335 val cmpResultOldVd = Wire(UInt(cmpResultWidth.W)) 336 val cmpResultOldVdRshiftWidth = Wire(UInt(6.W)) 337 cmpResultOldVdRshiftWidth := Mux1H( 338 Seq( 339 (outVecCtrl.vsew === VSew.e16) -> (outVecCtrl.vuopIdx(2, 0) << 3), 340 (outVecCtrl.vsew === VSew.e32) -> (outVecCtrl.vuopIdx(2, 0) << 2), 341 (outVecCtrl.vsew === VSew.e64) -> (outVecCtrl.vuopIdx(2, 0) << 1), 342 ) 343 ) 344 cmpResultOldVd := (outOldVd >> cmpResultOldVdRshiftWidth)(4*numVecModule-1,0) 345 val cmpResultForMgu = Wire(Vec(cmpResultWidth, Bool())) 346 private val maxVdIdx = 8 347 private val elementsInOneUop = Mux1H( 348 Seq( 349 (outEew === 1.U) -> (cmpResultWidth).U(4.W), 350 (outEew === 2.U) -> (cmpResultWidth / 2).U(4.W), 351 (outEew === 3.U) -> (cmpResultWidth / 4).U(4.W), 352 ) 353 ) 354 private val vdIdx = outVecCtrl.vuopIdx(2, 0) 355 private val elementsComputed = Mux1H(Seq.tabulate(maxVdIdx)(i => (vdIdx === i.U) -> (elementsInOneUop * i.U))) 356 for (i <- 0 until cmpResultWidth) { 357 val cmpResultWithVmask = Mux(outSrcMaskRShift(i), cmpResult(i), Mux(outVecCtrl.vma, true.B, cmpResultOldVd(i))) 358 cmpResultForMgu(i) := Mux(elementsComputed +& i.U >= outVl, true.B, cmpResultWithVmask) 359 } 360 val outIsFold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4 || outVecCtrl.fpu.isFoldTo1_8 361 val outOldVdForREDO = Mux1H(Seq( 362 (outVecCtrl.vsew === VSew.e16) -> (outOldVd >> 16), 363 (outVecCtrl.vsew === VSew.e32) -> (outOldVd >> 32), 364 (outVecCtrl.vsew === VSew.e64) -> (outOldVd >> 64), 365 )) 366 val outOldVdForWREDO = Mux( 367 !outIsFold, 368 Mux(outVecCtrl.vsew === VSew.e16, Cat(outOldVd(VLEN-1-16,16), 0.U(32.W)), Cat(outOldVd(VLEN-1-32,32), 0.U(64.W))), 369 Mux(outVecCtrl.vsew === VSew.e16, 370 // Divide vuopIdx by 8 and the remainder is 1 371 Mux(outVecCtrl.vuopIdx(2,0) === 1.U, outOldVd, outOldVd >> 16), 372 // Divide vuopIdx by 4 and the remainder is 1 373 Mux(outVecCtrl.vuopIdx(1,0) === 1.U, outOldVd, outOldVd >> 32) 374 ), 375 ) 376 val outOldVdForRED = Mux(outCtrl.fuOpType === VfaluType.vfredosum, outOldVdForREDO, outOldVdForWREDO) 377 val numOfUopVFREDOSUM = { 378 val uvlMax = MuxLookup(outVecCtrl.vsew, 0.U, Array( 379 VSew.e16 -> 8.U, 380 VSew.e32 -> 4.U, 381 VSew.e64 -> 2.U, 382 )) 383 val vlMax = Mux(outVecCtrl.vlmul(2), uvlMax >> (-outVecCtrl.vlmul)(1, 0), uvlMax << outVecCtrl.vlmul(1, 0)).asUInt 384 vlMax 385 } 386 val isOutOldVdForREDO = (outCtrl.fuOpType === VfaluType.vfredosum && outIsFold) || outCtrl.fuOpType === VfaluType.vfwredosum 387 val taIsFalseForVFREDO = ((outCtrl.fuOpType === VfaluType.vfredosum) || (outCtrl.fuOpType === VfaluType.vfwredosum)) && (outVecCtrl.vuopIdx =/= numOfUopVFREDOSUM - 1.U) 388 // outVecCtrl.fpu.isFpToVecInst means the instruction is float instruction, not vector float instruction 389 val notUseVl = outVecCtrl.fpu.isFpToVecInst || (outCtrl.fuOpType === VfaluType.vfmv_f_s) 390 val notModifyVd = !notUseVl && (outVl === 0.U) 391 mgu.io.in.vd := Mux(outVecCtrl.isDstMask, Cat(0.U((dataWidth / 16 * 15).W), cmpResultForMgu.asUInt), resultDataUInt) 392 mgu.io.in.oldVd := Mux(isOutOldVdForREDO, outOldVdForRED, outOldVd) 393 mgu.io.in.mask := maskToMgu 394 mgu.io.in.info.ta := Mux(outCtrl.fuOpType === VfaluType.vfmv_f_s, true.B , Mux(taIsFalseForVFREDO, false.B, outVecCtrl.vta)) 395 mgu.io.in.info.ma := Mux(outCtrl.fuOpType === VfaluType.vfmv_s_f, true.B , outVecCtrl.vma) 396 mgu.io.in.info.vl := outVlFix 397 mgu.io.in.info.vstart := outVecCtrl.vstart 398 mgu.io.in.info.vlmul := outVecCtrl.vlmul 399 mgu.io.in.info.valid := Mux(notModifyVd, false.B, io.in.valid) 400 mgu.io.in.info.vstart := Mux(outVecCtrl.fpu.isFpToVecInst, 0.U, outVecCtrl.vstart) 401 mgu.io.in.info.eew := outEew 402 mgu.io.in.info.vsew := outVecCtrl.vsew 403 mgu.io.in.info.vdIdx := Mux(outIsResuction, 0.U, outVecCtrl.vuopIdx) 404 mgu.io.in.info.narrow := outVecCtrl.isNarrow 405 mgu.io.in.info.dstMask := outVecCtrl.isDstMask 406 mgu.io.in.isIndexedVls := false.B 407 mgtu.io.in.vd := Mux(outVecCtrl.isDstMask, mgu.io.out.vd, resultDataUInt) 408 mgtu.io.in.vl := outVl 409 val resultFpMask = Wire(UInt(VLEN.W)) 410 val isFclass = outVecCtrl.fpu.isFpToVecInst && (outCtrl.fuOpType === VfaluType.vfclass) 411 val fpCmpFuOpType = Seq(VfaluType.vfeq, VfaluType.vflt, VfaluType.vfle) 412 val isCmp = outVecCtrl.fpu.isFpToVecInst && (fpCmpFuOpType.map(_ === outCtrl.fuOpType).reduce(_|_)) 413 resultFpMask := Mux(isFclass || isCmp, Fill(16, 1.U(1.W)), Fill(VLEN, 1.U(1.W))) 414 // when dest is mask, the result need to be masked by mgtu 415 io.out.bits.res.data := Mux(notModifyVd, outOldVd, Mux(outVecCtrl.isDstMask, mgtu.io.out.vd, mgu.io.out.vd) & resultFpMask) 416 io.out.bits.ctrl.exceptionVec.get(ExceptionNO.illegalInstr) := mgu.io.out.illegal 417 418} 419 420class VFMgu(vlen:Int)(implicit p: Parameters) extends Module{ 421 val io = IO(new VFMguIO(vlen)) 422 423 val vd = io.in.vd 424 val oldvd = io.in.oldVd 425 val mask = io.in.mask 426 val vsew = io.in.info.eew 427 val num16bits = vlen / 16 428 429} 430 431class VFMguIO(vlen: Int)(implicit p: Parameters) extends Bundle { 432 val in = new Bundle { 433 val vd = Input(UInt(vlen.W)) 434 val oldVd = Input(UInt(vlen.W)) 435 val mask = Input(UInt(vlen.W)) 436 val info = Input(new VecInfo) 437 } 438 val out = new Bundle { 439 val vd = Output(UInt(vlen.W)) 440 } 441}