120a5248fSzhanglinjuan/*************************************************************************************** 220a5248fSzhanglinjuan * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 320a5248fSzhanglinjuan * Copyright (c) 2020-2021 Peng Cheng Laboratory 420a5248fSzhanglinjuan * 520a5248fSzhanglinjuan * XiangShan is licensed under Mulan PSL v2. 620a5248fSzhanglinjuan * You can use this software according to the terms and conditions of the Mulan PSL v2. 720a5248fSzhanglinjuan * You may obtain a copy of Mulan PSL v2 at: 820a5248fSzhanglinjuan * http://license.coscl.org.cn/MulanPSL2 920a5248fSzhanglinjuan * 1020a5248fSzhanglinjuan * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 1120a5248fSzhanglinjuan * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 1220a5248fSzhanglinjuan * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 1320a5248fSzhanglinjuan * 1420a5248fSzhanglinjuan * See the Mulan PSL v2 for more details. 1520a5248fSzhanglinjuan ***************************************************************************************/ 1620a5248fSzhanglinjuan 1720a5248fSzhanglinjuanpackage xiangshan.mem 1820a5248fSzhanglinjuan 1920a5248fSzhanglinjuanimport org.chipsalliance.cde.config.Parameters 2020a5248fSzhanglinjuanimport chisel3._ 2120a5248fSzhanglinjuanimport chisel3.util._ 2220a5248fSzhanglinjuanimport utils._ 2320a5248fSzhanglinjuanimport utility._ 2420a5248fSzhanglinjuanimport xiangshan._ 2520a5248fSzhanglinjuanimport xiangshan.backend.rob.RobPtr 2620a5248fSzhanglinjuanimport xiangshan.backend.Bundles._ 2726af847eSgood-circleimport xiangshan.backend.fu.FuType 28d73f3705SAnzoimport xiangshan.backend.fu.vector.Bundles.VEew 2920a5248fSzhanglinjuan 3020a5248fSzhanglinjuan/** 3120a5248fSzhanglinjuan * Common used parameters or functions in vlsu 3220a5248fSzhanglinjuan */ 3320a5248fSzhanglinjuantrait VLSUConstants { 3420a5248fSzhanglinjuan val VLEN = 128 350f9b302eSweiding liu //for pack unit-stride flow 360f9b302eSweiding liu val AlignedNum = 4 // 1/2/4/8 3720a5248fSzhanglinjuan def VLENB = VLEN/8 3820a5248fSzhanglinjuan def vOffsetBits = log2Up(VLENB) // bits-width to index offset inside a vector reg 3926af847eSgood-circle lazy val vlmBindexBits = 8 //will be overrided later 4026af847eSgood-circle lazy val vsmBindexBits = 8 // will be overrided later 4120a5248fSzhanglinjuan 4200e6f2e2Sweiding liu def alignTypes = 5 // eew/sew = 1/2/4/8, last indicate 128 bit element 4320a5248fSzhanglinjuan def alignTypeBits = log2Up(alignTypes) 4420a5248fSzhanglinjuan def maxMUL = 8 4520a5248fSzhanglinjuan def maxFields = 8 4620a5248fSzhanglinjuan /** 4720a5248fSzhanglinjuan * In the most extreme cases like a segment indexed instruction, eew=64, emul=8, sew=8, lmul=1, 4820a5248fSzhanglinjuan * and nf=8, each data reg is mapped with 8 index regs and there are 8 data regs in total, 4920a5248fSzhanglinjuan * each for a field. Therefore an instruction can be divided into 64 uops at most. 5020a5248fSzhanglinjuan */ 5120a5248fSzhanglinjuan def maxUopNum = maxMUL * maxFields // 64 5220a5248fSzhanglinjuan def maxFlowNum = 16 5320a5248fSzhanglinjuan def maxElemNum = maxMUL * maxFlowNum // 128 5420a5248fSzhanglinjuan // def uopIdxBits = log2Up(maxUopNum) // to index uop inside an robIdx 5520a5248fSzhanglinjuan def elemIdxBits = log2Up(maxElemNum) + 1 // to index which element in an instruction 5620a5248fSzhanglinjuan def flowIdxBits = log2Up(maxFlowNum) + 1 // to index which flow in a uop 5720a5248fSzhanglinjuan def fieldBits = log2Up(maxFields) + 1 // 4-bits to indicate 1~8 5820a5248fSzhanglinjuan 5920a5248fSzhanglinjuan def ewBits = 3 // bits-width of EEW/SEW 6020a5248fSzhanglinjuan def mulBits = 3 // bits-width of emul/lmul 6120a5248fSzhanglinjuan 6220a5248fSzhanglinjuan def getSlice(data: UInt, i: Int, alignBits: Int): UInt = { 6320a5248fSzhanglinjuan require(data.getWidth >= (i+1) * alignBits) 6420a5248fSzhanglinjuan data((i+1) * alignBits - 1, i * alignBits) 6520a5248fSzhanglinjuan } 66b5d66726Sweiding liu def getNoAlignedSlice(data: UInt, i: Int, alignBits: Int): UInt = { 67b5d66726Sweiding liu data(i * 8 + alignBits - 1, i * 8) 68b5d66726Sweiding liu } 6920a5248fSzhanglinjuan 7020a5248fSzhanglinjuan def getByte(data: UInt, i: Int = 0) = getSlice(data, i, 8) 7120a5248fSzhanglinjuan def getHalfWord(data: UInt, i: Int = 0) = getSlice(data, i, 16) 7220a5248fSzhanglinjuan def getWord(data: UInt, i: Int = 0) = getSlice(data, i, 32) 7320a5248fSzhanglinjuan def getDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 64) 7400e6f2e2Sweiding liu def getDoubleDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 128) 7520a5248fSzhanglinjuan} 7620a5248fSzhanglinjuan 7720a5248fSzhanglinjuantrait HasVLSUParameters extends HasXSParameter with VLSUConstants { 7820a5248fSzhanglinjuan override val VLEN = coreParams.VLEN 7926af847eSgood-circle override lazy val vlmBindexBits = log2Up(coreParams.VlMergeBufferSize) 8026af847eSgood-circle override lazy val vsmBindexBits = log2Up(coreParams.VsMergeBufferSize) 81a31db3ffSweiding liu lazy val maxMemByteNum = 16 // Maximum bytes for a single memory access 82a31db3ffSweiding liu /** 83a31db3ffSweiding liu * get addr aligned low bits 84a31db3ffSweiding liu * @param addr Address to be check 85a31db3ffSweiding liu * @param width Width for checking alignment 86a31db3ffSweiding liu */ 87a31db3ffSweiding liu def getCheckAddrLowBits(addr: UInt, width: Int): UInt = addr(log2Up(width) - 1, 0) 88a31db3ffSweiding liu def getOverflowBit(in: UInt, width: Int): UInt = in(log2Up(width)) 8920a5248fSzhanglinjuan def isUnitStride(instType: UInt) = instType(1, 0) === "b00".U 9020a5248fSzhanglinjuan def isStrided(instType: UInt) = instType(1, 0) === "b10".U 9120a5248fSzhanglinjuan def isIndexed(instType: UInt) = instType(0) === "b1".U 9220a5248fSzhanglinjuan def isNotIndexed(instType: UInt) = instType(0) === "b0".U 93a5204571Szhanglinjuan def isSegment(instType: UInt) = instType(2) === "b1".U 9400e6f2e2Sweiding liu def is128Bit(alignedType: UInt) = alignedType(2) === "b1".U 9520a5248fSzhanglinjuan 9620a5248fSzhanglinjuan def mergeDataWithMask(oldData: UInt, newData: UInt, mask: UInt): Vec[UInt] = { 9720a5248fSzhanglinjuan require(oldData.getWidth == newData.getWidth) 9820a5248fSzhanglinjuan require(oldData.getWidth == mask.getWidth * 8) 9920a5248fSzhanglinjuan VecInit(mask.asBools.zipWithIndex.map { case (en, i) => 10020a5248fSzhanglinjuan Mux(en, getByte(newData, i), getByte(oldData, i)) 10120a5248fSzhanglinjuan }) 10220a5248fSzhanglinjuan } 10320a5248fSzhanglinjuan 10420a5248fSzhanglinjuan // def asBytes(data: UInt) = { 10520a5248fSzhanglinjuan // require(data.getWidth % 8 == 0) 10620a5248fSzhanglinjuan // (0 until data.getWidth/8).map(i => getByte(data, i)) 10720a5248fSzhanglinjuan // } 10820a5248fSzhanglinjuan 10920a5248fSzhanglinjuan def mergeDataWithElemIdx( 11020a5248fSzhanglinjuan oldData: UInt, 11120a5248fSzhanglinjuan newData: Seq[UInt], 11220a5248fSzhanglinjuan alignedType: UInt, 11320a5248fSzhanglinjuan elemIdx: Seq[UInt], 11420a5248fSzhanglinjuan valids: Seq[Bool] 11520a5248fSzhanglinjuan ): UInt = { 11620a5248fSzhanglinjuan require(newData.length == elemIdx.length) 11720a5248fSzhanglinjuan require(newData.length == valids.length) 11820a5248fSzhanglinjuan LookupTree(alignedType, List( 11920a5248fSzhanglinjuan "b00".U -> VecInit(elemIdx.map(e => UIntToOH(e(3, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 12020a5248fSzhanglinjuan ParallelPosteriorityMux( 12120a5248fSzhanglinjuan true.B +: selVec.zip(valids).map(x => x._1 && x._2), 12220a5248fSzhanglinjuan getByte(oldData, i) +: newData.map(getByte(_)) 12320a5248fSzhanglinjuan )}).asUInt, 12420a5248fSzhanglinjuan "b01".U -> VecInit(elemIdx.map(e => UIntToOH(e(2, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 12520a5248fSzhanglinjuan ParallelPosteriorityMux( 12620a5248fSzhanglinjuan true.B +: selVec.zip(valids).map(x => x._1 && x._2), 12720a5248fSzhanglinjuan getHalfWord(oldData, i) +: newData.map(getHalfWord(_)) 12820a5248fSzhanglinjuan )}).asUInt, 12920a5248fSzhanglinjuan "b10".U -> VecInit(elemIdx.map(e => UIntToOH(e(1, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 13020a5248fSzhanglinjuan ParallelPosteriorityMux( 13120a5248fSzhanglinjuan true.B +: selVec.zip(valids).map(x => x._1 && x._2), 13220a5248fSzhanglinjuan getWord(oldData, i) +: newData.map(getWord(_)) 13320a5248fSzhanglinjuan )}).asUInt, 13420a5248fSzhanglinjuan "b11".U -> VecInit(elemIdx.map(e => UIntToOH(e(0)).asBools).transpose.zipWithIndex.map { case (selVec, i) => 13520a5248fSzhanglinjuan ParallelPosteriorityMux( 13620a5248fSzhanglinjuan true.B +: selVec.zip(valids).map(x => x._1 && x._2), 13720a5248fSzhanglinjuan getDoubleWord(oldData, i) +: newData.map(getDoubleWord(_)) 13820a5248fSzhanglinjuan )}).asUInt 13920a5248fSzhanglinjuan )) 14020a5248fSzhanglinjuan } 14120a5248fSzhanglinjuan 14220a5248fSzhanglinjuan def mergeDataWithElemIdx(oldData: UInt, newData: UInt, alignedType: UInt, elemIdx: UInt): UInt = { 14320a5248fSzhanglinjuan mergeDataWithElemIdx(oldData, Seq(newData), alignedType, Seq(elemIdx), Seq(true.B)) 14420a5248fSzhanglinjuan } 14526af847eSgood-circle /** 14626af847eSgood-circle * for merge 128-bits data of unit-stride 14726af847eSgood-circle */ 148b5d66726Sweiding liu object mergeDataByByte{ 149b5d66726Sweiding liu def apply(oldData: UInt, newData: UInt, mask: UInt): UInt = { 150b5d66726Sweiding liu val selVec = Seq(mask).map(_.asBools).transpose 151b5d66726Sweiding liu VecInit(selVec.zipWithIndex.map{ case (selV, i) => 15226af847eSgood-circle ParallelPosteriorityMux( 153b5d66726Sweiding liu true.B +: selV.map(x => x), 154b5d66726Sweiding liu getByte(oldData, i) +: Seq(getByte(newData, i)) 15526af847eSgood-circle )}).asUInt 15626af847eSgood-circle } 15726af847eSgood-circle } 158b5d66726Sweiding liu 159b5d66726Sweiding liu /** 160b5d66726Sweiding liu * for merge Unit-Stride data to 256-bits 161b5d66726Sweiding liu * merge 128-bits data to 256-bits 162b5d66726Sweiding liu * if have 3 port, 163b5d66726Sweiding liu * if is port0, it is 6 to 1 Multiplexer -> (128'b0, data) or (data, 128'b0) or (data, port2data) or (port2data, data) or (data, port3data) or (port3data, data) 164b5d66726Sweiding liu * if is port1, it is 4 to 1 Multiplexer -> (128'b0, data) or (data, 128'b0) or (data, port3data) or (port3data, data) 165b5d66726Sweiding liu * if is port3, it is 2 to 1 Multiplexer -> (128'b0, data) or (data, 128'b0) 166b5d66726Sweiding liu * 167b5d66726Sweiding liu */ 168b5d66726Sweiding liu object mergeDataByIndex{ 169b5d66726Sweiding liu def apply(data: Seq[UInt], mask: Seq[UInt], index: UInt, valids: Seq[Bool]): (UInt, UInt) = { 170b5d66726Sweiding liu require(data.length == valids.length) 171b5d66726Sweiding liu require(data.length == mask.length) 172b5d66726Sweiding liu val muxLength = data.length 173b5d66726Sweiding liu val selDataMatrix = Wire(Vec(muxLength, Vec(2, UInt((VLEN * 2).W)))) // 3 * 2 * 256 174b5d66726Sweiding liu val selMaskMatrix = Wire(Vec(muxLength, Vec(2, UInt((VLENB * 2).W)))) // 3 * 2 * 16 175189d8d00SAnzo 176189d8d00SAnzo if (backendParams.debugEn){ 177b5d66726Sweiding liu dontTouch(selDataMatrix) 178b5d66726Sweiding liu dontTouch(selMaskMatrix) 179189d8d00SAnzo } 180189d8d00SAnzo 181b5d66726Sweiding liu for(i <- 0 until muxLength){ 182b5d66726Sweiding liu if(i == 0){ 183b5d66726Sweiding liu selDataMatrix(i)(0) := Cat(0.U(VLEN.W), data(i)) 184b5d66726Sweiding liu selDataMatrix(i)(1) := Cat(data(i), 0.U(VLEN.W)) 185b5d66726Sweiding liu selMaskMatrix(i)(0) := Cat(0.U(VLENB.W), mask(i)) 186b5d66726Sweiding liu selMaskMatrix(i)(1) := Cat(mask(i), 0.U(VLENB.W)) 187b5d66726Sweiding liu } 188b5d66726Sweiding liu else{ 189b5d66726Sweiding liu selDataMatrix(i)(0) := Cat(data(i), data(0)) 190b5d66726Sweiding liu selDataMatrix(i)(1) := Cat(data(0), data(i)) 191b5d66726Sweiding liu selMaskMatrix(i)(0) := Cat(mask(i), mask(0)) 192b5d66726Sweiding liu selMaskMatrix(i)(1) := Cat(mask(0), mask(i)) 193b5d66726Sweiding liu } 194b5d66726Sweiding liu } 195b5d66726Sweiding liu val selIdxVec = (0 until muxLength).map(_.U) 196b5d66726Sweiding liu val selIdx = PriorityMux(valids.reverse, selIdxVec.reverse) 197b5d66726Sweiding liu 19855178b77Sweiding liu val selData = Mux(index === 0.U, 19955178b77Sweiding liu selDataMatrix(selIdx)(0), 20055178b77Sweiding liu selDataMatrix(selIdx)(1)) 20155178b77Sweiding liu val selMask = Mux(index === 0.U, 20255178b77Sweiding liu selMaskMatrix(selIdx)(0), 20355178b77Sweiding liu selMaskMatrix(selIdx)(1)) 204b5d66726Sweiding liu (selData, selMask) 205b5d66726Sweiding liu } 206b5d66726Sweiding liu } 207b5d66726Sweiding liu def mergeDataByIndex(data: UInt, mask: UInt, index: UInt): (UInt, UInt) = { 208b5d66726Sweiding liu mergeDataByIndex(Seq(data), Seq(mask), index, Seq(true.B)) 20926af847eSgood-circle } 21020a5248fSzhanglinjuan} 21120a5248fSzhanglinjuanabstract class VLSUModule(implicit p: Parameters) extends XSModule 21220a5248fSzhanglinjuan with HasVLSUParameters 21320a5248fSzhanglinjuan with HasCircularQueuePtrHelper 21420a5248fSzhanglinjuanabstract class VLSUBundle(implicit p: Parameters) extends XSBundle 21520a5248fSzhanglinjuan with HasVLSUParameters 21620a5248fSzhanglinjuan 21720a5248fSzhanglinjuanclass VLSUBundleWithMicroOp(implicit p: Parameters) extends VLSUBundle { 21820a5248fSzhanglinjuan val uop = new DynInst 21920a5248fSzhanglinjuan} 22020a5248fSzhanglinjuan 22120a5248fSzhanglinjuanclass OnlyVecExuOutput(implicit p: Parameters) extends VLSUBundle { 22220a5248fSzhanglinjuan val isvec = Bool() 22320a5248fSzhanglinjuan val vecdata = UInt(VLEN.W) 22420a5248fSzhanglinjuan val mask = UInt(VLENB.W) 22520a5248fSzhanglinjuan // val rob_idx_valid = Vec(2, Bool()) 22620a5248fSzhanglinjuan // val inner_idx = Vec(2, UInt(3.W)) 22720a5248fSzhanglinjuan // val rob_idx = Vec(2, new RobPtr) 22820a5248fSzhanglinjuan // val offset = Vec(2, UInt(4.W)) 22920a5248fSzhanglinjuan val reg_offset = UInt(vOffsetBits.W) 230e20747afSXuan Hu val vecActive = Bool() // 1: vector active element, 0: vector not active element 23120a5248fSzhanglinjuan val is_first_ele = Bool() 232ab42062eSxuzefan val elemIdx = UInt(elemIdxBits.W) // element index 233748999d4Szhanglinjuan val elemIdxInsideVd = UInt(elemIdxBits.W) // element index in scope of vd 234506ca2a3SAnzooooo val trigger = TriggerAction() 23541c5202dSAnzooooo val vstart = UInt(elemIdxBits.W) 236d0d2c22dSAnzooooo val vecTriggerMask = UInt((VLEN/8).W) 2373952421bSweiding liu // val uopQueuePtr = new VluopPtr 2383952421bSweiding liu // val flowPtr = new VlflowPtr 23920a5248fSzhanglinjuan} 24020a5248fSzhanglinjuan 24120a5248fSzhanglinjuanclass VecExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters { 24220a5248fSzhanglinjuan val vec = new OnlyVecExuOutput 2430f9b302eSweiding liu val alignedType = UInt(alignTypeBits.W) 24426af847eSgood-circle // feedback 24526af847eSgood-circle val vecFeedback = Bool() 24620a5248fSzhanglinjuan} 24720a5248fSzhanglinjuan 24820a5248fSzhanglinjuanclass VecUopBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 24920a5248fSzhanglinjuan val flowMask = UInt(VLENB.W) // each bit for a flow 25020a5248fSzhanglinjuan val byteMask = UInt(VLENB.W) // each bit for a byte 25120a5248fSzhanglinjuan val data = UInt(VLEN.W) 25220a5248fSzhanglinjuan // val fof = Bool() // fof is only used for vector loads 25320a5248fSzhanglinjuan val excp_eew_index = UInt(elemIdxBits.W) 25420a5248fSzhanglinjuan // val exceptionVec = ExceptionVec() // uop has exceptionVec 25520a5248fSzhanglinjuan val baseAddr = UInt(VAddrBits.W) 25620a5248fSzhanglinjuan val stride = UInt(VLEN.W) 25720a5248fSzhanglinjuan val flow_counter = UInt(flowIdxBits.W) 25820a5248fSzhanglinjuan 25920a5248fSzhanglinjuan // instruction decode result 26020a5248fSzhanglinjuan val flowNum = UInt(flowIdxBits.W) // # of flows in a uop 26120a5248fSzhanglinjuan // val flowNumLog2 = UInt(log2Up(flowIdxBits).W) // log2(flowNum), for better timing of multiplication 26220a5248fSzhanglinjuan val nfields = UInt(fieldBits.W) // NFIELDS 26320a5248fSzhanglinjuan val vm = Bool() // whether vector masking is enabled 26420a5248fSzhanglinjuan val usWholeReg = Bool() // unit-stride, whole register load 26520a5248fSzhanglinjuan val usMaskReg = Bool() // unit-stride, masked store/load 266d73f3705SAnzo val eew = VEew() // size of memory elements 26720a5248fSzhanglinjuan val sew = UInt(ewBits.W) 26820a5248fSzhanglinjuan val emul = UInt(mulBits.W) 26920a5248fSzhanglinjuan val lmul = UInt(mulBits.W) 27020a5248fSzhanglinjuan val vlmax = UInt(elemIdxBits.W) 27120a5248fSzhanglinjuan val instType = UInt(3.W) 272c98a84acSzhanglinjuan val vd_last_uop = Bool() 273c98a84acSzhanglinjuan val vd_first_uop = Bool() 27420a5248fSzhanglinjuan} 27520a5248fSzhanglinjuan 27620a5248fSzhanglinjuanclass VecFlowBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp { 27720a5248fSzhanglinjuan val vaddr = UInt(VAddrBits.W) 27820a5248fSzhanglinjuan val mask = UInt(VLENB.W) 27920a5248fSzhanglinjuan val alignedType = UInt(alignTypeBits.W) 2809ac5754fSweiding liu val vecActive = Bool() 281ab42062eSxuzefan val elemIdx = UInt(elemIdxBits.W) 28220a5248fSzhanglinjuan val is_first_ele = Bool() 2830f9b302eSweiding liu 2840f9b302eSweiding liu // pack 2850f9b302eSweiding liu val isPackage = Bool() 28600e6f2e2Sweiding liu val packageNum = UInt((log2Up(VLENB) + 1).W) 2870f9b302eSweiding liu val originAlignedType = UInt(alignTypeBits.W) 28820a5248fSzhanglinjuan} 28920a5248fSzhanglinjuan 29026af847eSgood-circleclass VecMemExuOutput(isVector: Boolean = false)(implicit p: Parameters) extends VLSUBundle{ 29126af847eSgood-circle val output = new MemExuOutput(isVector) 29226af847eSgood-circle val vecFeedback = Bool() 293780e55f4SYanqin Li val nc = Bool() 29426af847eSgood-circle val mmio = Bool() 29526af847eSgood-circle val usSecondInv = Bool() 296da51a7acSAnzo val hasException = Bool() 29726af847eSgood-circle val elemIdx = UInt(elemIdxBits.W) 29826af847eSgood-circle val alignedType = UInt(alignTypeBits.W) 299ebb914e7Sweiding liu val mbIndex = UInt(vsmBindexBits.W) 30055178b77Sweiding liu val mask = UInt(VLENB.W) 301db6cfb5aSHaoyuan Feng val vaddr = UInt(XLEN.W) 30246e9ee74SHaoyuan Feng val vaNeedExt = Bool() 303a53daa0fSHaoyuan Feng val gpaddr = UInt(GPAddrBits.W) 304ad415ae0SXiaokun-Pei val isForVSnonLeafPTE = Bool() 305d0d2c22dSAnzooooo val vecTriggerMask = UInt((VLEN/8).W) 30626af847eSgood-circle} 30726af847eSgood-circle 30820a5248fSzhanglinjuanobject MulNum { 30920a5248fSzhanglinjuan def apply (mul: UInt): UInt = { //mul means emul or lmul 31020a5248fSzhanglinjuan (LookupTree(mul,List( 31120a5248fSzhanglinjuan "b101".U -> 1.U , // 1/8 31220a5248fSzhanglinjuan "b110".U -> 1.U , // 1/4 31320a5248fSzhanglinjuan "b111".U -> 1.U , // 1/2 31420a5248fSzhanglinjuan "b000".U -> 1.U , // 1 31520a5248fSzhanglinjuan "b001".U -> 2.U , // 2 31620a5248fSzhanglinjuan "b010".U -> 4.U , // 4 31720a5248fSzhanglinjuan "b011".U -> 8.U // 8 31820a5248fSzhanglinjuan )))} 31920a5248fSzhanglinjuan} 32020a5248fSzhanglinjuan/** 32120a5248fSzhanglinjuan * when emul is greater than or equal to 1, this means the entire register needs to be written; 32220a5248fSzhanglinjuan * otherwise, only write the specified number of bytes */ 32320a5248fSzhanglinjuanobject MulDataSize { 32420a5248fSzhanglinjuan def apply (mul: UInt): UInt = { //mul means emul or lmul 32520a5248fSzhanglinjuan (LookupTree(mul,List( 32620a5248fSzhanglinjuan "b101".U -> 2.U , // 1/8 32720a5248fSzhanglinjuan "b110".U -> 4.U , // 1/4 32820a5248fSzhanglinjuan "b111".U -> 8.U , // 1/2 32920a5248fSzhanglinjuan "b000".U -> 16.U , // 1 33020a5248fSzhanglinjuan "b001".U -> 16.U , // 2 33120a5248fSzhanglinjuan "b010".U -> 16.U , // 4 33220a5248fSzhanglinjuan "b011".U -> 16.U // 8 33320a5248fSzhanglinjuan )))} 33420a5248fSzhanglinjuan} 33520a5248fSzhanglinjuan 33620a5248fSzhanglinjuanobject OneRegNum { 33720a5248fSzhanglinjuan def apply (eew: UInt): UInt = { //mul means emul or lmul 338d73f3705SAnzo require(eew.getWidth == 2, "The eew width must be 2.") 33920a5248fSzhanglinjuan (LookupTree(eew, List( 340d73f3705SAnzo "b00".U -> 16.U , // 1 341d73f3705SAnzo "b01".U -> 8.U , // 2 342d73f3705SAnzo "b10".U -> 4.U , // 4 343d73f3705SAnzo "b11".U -> 2.U // 8 34420a5248fSzhanglinjuan )))} 34520a5248fSzhanglinjuan} 34620a5248fSzhanglinjuan 34720a5248fSzhanglinjuan//index inst read data byte 34820a5248fSzhanglinjuanobject SewDataSize { 34920a5248fSzhanglinjuan def apply (sew: UInt): UInt = { 35020a5248fSzhanglinjuan (LookupTree(sew,List( 35120a5248fSzhanglinjuan "b000".U -> 1.U , // 1 35220a5248fSzhanglinjuan "b001".U -> 2.U , // 2 35320a5248fSzhanglinjuan "b010".U -> 4.U , // 4 35420a5248fSzhanglinjuan "b011".U -> 8.U // 8 35520a5248fSzhanglinjuan )))} 35620a5248fSzhanglinjuan} 35720a5248fSzhanglinjuan 35820a5248fSzhanglinjuan// strided inst read data byte 35920a5248fSzhanglinjuanobject EewDataSize { 36020a5248fSzhanglinjuan def apply (eew: UInt): UInt = { 361d73f3705SAnzo require(eew.getWidth == 2, "The eew width must be 2.") 36220a5248fSzhanglinjuan (LookupTree(eew, List( 363d73f3705SAnzo "b00".U -> 1.U , // 1 364d73f3705SAnzo "b01".U -> 2.U , // 2 365d73f3705SAnzo "b10".U -> 4.U , // 4 366d73f3705SAnzo "b11".U -> 8.U // 8 36720a5248fSzhanglinjuan )))} 36820a5248fSzhanglinjuan} 36920a5248fSzhanglinjuan 37020a5248fSzhanglinjuanobject loadDataSize { 37120a5248fSzhanglinjuan def apply (instType: UInt, emul: UInt, eew: UInt, sew: UInt): UInt = { 37220a5248fSzhanglinjuan (LookupTree(instType,List( 37320a5248fSzhanglinjuan "b000".U -> MulDataSize(emul), // unit-stride 37420a5248fSzhanglinjuan "b010".U -> EewDataSize(eew) , // strided 37520a5248fSzhanglinjuan "b001".U -> SewDataSize(sew) , // indexed-unordered 37620a5248fSzhanglinjuan "b011".U -> SewDataSize(sew) , // indexed-ordered 37720a5248fSzhanglinjuan "b100".U -> EewDataSize(eew) , // segment unit-stride 37820a5248fSzhanglinjuan "b110".U -> EewDataSize(eew) , // segment strided 37920a5248fSzhanglinjuan "b101".U -> SewDataSize(sew) , // segment indexed-unordered 38020a5248fSzhanglinjuan "b111".U -> SewDataSize(sew) // segment indexed-ordered 38120a5248fSzhanglinjuan )))} 38220a5248fSzhanglinjuan} 38320a5248fSzhanglinjuan 38420a5248fSzhanglinjuanobject storeDataSize { 38520a5248fSzhanglinjuan def apply (instType: UInt, eew: UInt, sew: UInt): UInt = { 38620a5248fSzhanglinjuan (LookupTree(instType,List( 38720a5248fSzhanglinjuan "b000".U -> EewDataSize(eew) , // unit-stride, do not use 38820a5248fSzhanglinjuan "b010".U -> EewDataSize(eew) , // strided 38920a5248fSzhanglinjuan "b001".U -> SewDataSize(sew) , // indexed-unordered 39020a5248fSzhanglinjuan "b011".U -> SewDataSize(sew) , // indexed-ordered 39120a5248fSzhanglinjuan "b100".U -> EewDataSize(eew) , // segment unit-stride 39220a5248fSzhanglinjuan "b110".U -> EewDataSize(eew) , // segment strided 39320a5248fSzhanglinjuan "b101".U -> SewDataSize(sew) , // segment indexed-unordered 39420a5248fSzhanglinjuan "b111".U -> SewDataSize(sew) // segment indexed-ordered 39520a5248fSzhanglinjuan )))} 39620a5248fSzhanglinjuan} 39720a5248fSzhanglinjuan 39820a5248fSzhanglinjuan/** 39920a5248fSzhanglinjuan * these are used to obtain immediate addresses for index instruction */ 40020a5248fSzhanglinjuanobject EewEq8 { 40120a5248fSzhanglinjuan def apply(index:UInt, flow_inner_idx: UInt): UInt = { 40220a5248fSzhanglinjuan (LookupTree(flow_inner_idx,List( 40320a5248fSzhanglinjuan 0.U -> index(7 ,0 ), 40420a5248fSzhanglinjuan 1.U -> index(15,8 ), 40520a5248fSzhanglinjuan 2.U -> index(23,16 ), 40620a5248fSzhanglinjuan 3.U -> index(31,24 ), 40720a5248fSzhanglinjuan 4.U -> index(39,32 ), 40820a5248fSzhanglinjuan 5.U -> index(47,40 ), 40920a5248fSzhanglinjuan 6.U -> index(55,48 ), 41020a5248fSzhanglinjuan 7.U -> index(63,56 ), 41120a5248fSzhanglinjuan 8.U -> index(71,64 ), 41220a5248fSzhanglinjuan 9.U -> index(79,72 ), 41320a5248fSzhanglinjuan 10.U -> index(87,80 ), 41420a5248fSzhanglinjuan 11.U -> index(95,88 ), 41520a5248fSzhanglinjuan 12.U -> index(103,96 ), 41620a5248fSzhanglinjuan 13.U -> index(111,104), 41720a5248fSzhanglinjuan 14.U -> index(119,112), 41820a5248fSzhanglinjuan 15.U -> index(127,120) 41920a5248fSzhanglinjuan )))} 42020a5248fSzhanglinjuan} 42120a5248fSzhanglinjuan 42220a5248fSzhanglinjuanobject EewEq16 { 42320a5248fSzhanglinjuan def apply(index: UInt, flow_inner_idx: UInt): UInt = { 42420a5248fSzhanglinjuan (LookupTree(flow_inner_idx, List( 42520a5248fSzhanglinjuan 0.U -> index(15, 0), 42620a5248fSzhanglinjuan 1.U -> index(31, 16), 42720a5248fSzhanglinjuan 2.U -> index(47, 32), 42820a5248fSzhanglinjuan 3.U -> index(63, 48), 42920a5248fSzhanglinjuan 4.U -> index(79, 64), 43020a5248fSzhanglinjuan 5.U -> index(95, 80), 43120a5248fSzhanglinjuan 6.U -> index(111, 96), 43220a5248fSzhanglinjuan 7.U -> index(127, 112) 43320a5248fSzhanglinjuan )))} 43420a5248fSzhanglinjuan} 43520a5248fSzhanglinjuan 43620a5248fSzhanglinjuanobject EewEq32 { 43720a5248fSzhanglinjuan def apply(index: UInt, flow_inner_idx: UInt): UInt = { 43820a5248fSzhanglinjuan (LookupTree(flow_inner_idx, List( 43920a5248fSzhanglinjuan 0.U -> index(31, 0), 44020a5248fSzhanglinjuan 1.U -> index(63, 32), 44120a5248fSzhanglinjuan 2.U -> index(95, 64), 44220a5248fSzhanglinjuan 3.U -> index(127, 96) 44320a5248fSzhanglinjuan )))} 44420a5248fSzhanglinjuan} 44520a5248fSzhanglinjuan 44620a5248fSzhanglinjuanobject EewEq64 { 44720a5248fSzhanglinjuan def apply (index: UInt, flow_inner_idx: UInt): UInt = { 44820a5248fSzhanglinjuan (LookupTree(flow_inner_idx, List( 44920a5248fSzhanglinjuan 0.U -> index(63, 0), 45020a5248fSzhanglinjuan 1.U -> index(127, 64) 45120a5248fSzhanglinjuan )))} 45220a5248fSzhanglinjuan} 45320a5248fSzhanglinjuan 45420a5248fSzhanglinjuanobject IndexAddr { 45520a5248fSzhanglinjuan def apply (index: UInt, flow_inner_idx: UInt, eew: UInt): UInt = { 456d73f3705SAnzo require(eew.getWidth == 2, "The eew width must be 2.") 45720a5248fSzhanglinjuan (LookupTree(eew, List( 458d73f3705SAnzo "b00".U -> EewEq8 (index = index, flow_inner_idx = flow_inner_idx ), // Imm is 1 Byte // TODO: index maybe cross register 459d73f3705SAnzo "b01".U -> EewEq16(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 2 Byte 460d73f3705SAnzo "b10".U -> EewEq32(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 4 Byte 461d73f3705SAnzo "b11".U -> EewEq64(index = index, flow_inner_idx = flow_inner_idx ) // Imm is 8 Byte 46220a5248fSzhanglinjuan )))} 46320a5248fSzhanglinjuan} 46420a5248fSzhanglinjuan 46520a5248fSzhanglinjuanobject Log2Num { 46620a5248fSzhanglinjuan def apply (num: UInt): UInt = { 46720a5248fSzhanglinjuan (LookupTree(num,List( 46820a5248fSzhanglinjuan 16.U -> 4.U, 46920a5248fSzhanglinjuan 8.U -> 3.U, 47020a5248fSzhanglinjuan 4.U -> 2.U, 47120a5248fSzhanglinjuan 2.U -> 1.U, 47220a5248fSzhanglinjuan 1.U -> 0.U 47320a5248fSzhanglinjuan )))} 47420a5248fSzhanglinjuan} 47520a5248fSzhanglinjuan 476a5204571Szhanglinjuanobject GenUopIdxInField { 4770869ae56Sweiding liu /** 4780869ae56Sweiding liu * Used in normal vector instruction 4790869ae56Sweiding liu * */ 480a5204571Szhanglinjuan def apply (instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 481a5204571Szhanglinjuan val isIndexed = instType(0) 482a5204571Szhanglinjuan val mulInField = Mux( 483a5204571Szhanglinjuan isIndexed, 484a5204571Szhanglinjuan Mux(lmul.asSInt > emul.asSInt, lmul, emul), 485a5204571Szhanglinjuan emul 486a5204571Szhanglinjuan ) 487a5204571Szhanglinjuan LookupTree(mulInField, List( 488a5204571Szhanglinjuan "b101".U -> 0.U, 489a5204571Szhanglinjuan "b110".U -> 0.U, 490a5204571Szhanglinjuan "b111".U -> 0.U, 491a5204571Szhanglinjuan "b000".U -> 0.U, 492a5204571Szhanglinjuan "b001".U -> uopIdx(0), 493a5204571Szhanglinjuan "b010".U -> uopIdx(1, 0), 494a5204571Szhanglinjuan "b011".U -> uopIdx(2, 0) 495a5204571Szhanglinjuan )) 496a5204571Szhanglinjuan } 4970869ae56Sweiding liu /** 4980869ae56Sweiding liu * Only used in segment instruction. 4990869ae56Sweiding liu * */ 5000869ae56Sweiding liu def apply (select: UInt, uopIdx: UInt): UInt = { 5010869ae56Sweiding liu LookupTree(select, List( 5020869ae56Sweiding liu "b101".U -> 0.U, 5030869ae56Sweiding liu "b110".U -> 0.U, 5040869ae56Sweiding liu "b111".U -> 0.U, 5050869ae56Sweiding liu "b000".U -> 0.U, 5060869ae56Sweiding liu "b001".U -> uopIdx(0), 5070869ae56Sweiding liu "b010".U -> uopIdx(1, 0), 5080869ae56Sweiding liu "b011".U -> uopIdx(2, 0) 5090869ae56Sweiding liu )) 5100869ae56Sweiding liu } 511a5204571Szhanglinjuan} 512a5204571Szhanglinjuan 51320a5248fSzhanglinjuan//eew decode 51420a5248fSzhanglinjuanobject EewLog2 extends VLSUConstants { 51520a5248fSzhanglinjuan // def apply (eew: UInt): UInt = { 51620a5248fSzhanglinjuan // (LookupTree(eew,List( 51720a5248fSzhanglinjuan // "b000".U -> "b000".U , // 1 51820a5248fSzhanglinjuan // "b101".U -> "b001".U , // 2 51920a5248fSzhanglinjuan // "b110".U -> "b010".U , // 4 52020a5248fSzhanglinjuan // "b111".U -> "b011".U // 8 52120a5248fSzhanglinjuan // )))} 522d73f3705SAnzo def apply(eew: UInt): UInt = { 523d73f3705SAnzo require(eew.getWidth == 2, "The eew width must be 2.") 524d73f3705SAnzo ZeroExt(eew, ewBits) 525d73f3705SAnzo } 52620a5248fSzhanglinjuan} 52720a5248fSzhanglinjuan 528839e1a88SAnzoooooobject GenRealFlowNum { 52920a5248fSzhanglinjuan /** 53020a5248fSzhanglinjuan * unit-stride instructions don't use this method; 531d73f3705SAnzo * other instructions generate realFlowNum by EmulDataSize >> eew, 53220a5248fSzhanglinjuan * EmulDataSize means the number of bytes that need to be written to the register, 533d73f3705SAnzo * eew means the number of bytes written at once. 534839e1a88SAnzooooo * 535839e1a88SAnzooooo * @param instType As the name implies. 536839e1a88SAnzooooo * @param emul As the name implies. 537839e1a88SAnzooooo * @param lmul As the name implies. 538839e1a88SAnzooooo * @param eew As the name implies. 539839e1a88SAnzooooo * @param sew As the name implies. 540839e1a88SAnzooooo * @param isSegment Only modules related to segment need to be set to true. 541839e1a88SAnzooooo * @return FlowNum of instruction. 542839e1a88SAnzooooo * 543839e1a88SAnzooooo */ 544839e1a88SAnzooooo def apply (instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, isSegment: Boolean = false): UInt = { 54532977e5dSAnzooooo require(instType.getWidth == 3, "The instType width must be 3, (isSegment, mop)") 546d73f3705SAnzo require(eew.getWidth == 2, "The eew width must be 2.") 547839e1a88SAnzooooo // Because the new segmentunit is needed. But the previous implementation is retained for the time being in case of emergency. 548839e1a88SAnzooooo val segmentIndexFlowNum = if (isSegment) (MulDataSize(lmul) >> sew(1,0)).asUInt 549d73f3705SAnzo else Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt) 55020a5248fSzhanglinjuan (LookupTree(instType,List( 551d73f3705SAnzo "b000".U -> (MulDataSize(emul) >> eew).asUInt, // store use, load do not use 552d73f3705SAnzo "b010".U -> (MulDataSize(emul) >> eew).asUInt, // strided 553d73f3705SAnzo "b001".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-unordered 554d73f3705SAnzo "b011".U -> Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-ordered 555d73f3705SAnzo "b100".U -> (MulDataSize(emul) >> eew).asUInt, // segment unit-stride 556d73f3705SAnzo "b110".U -> (MulDataSize(emul) >> eew).asUInt, // segment strided 557839e1a88SAnzooooo "b101".U -> segmentIndexFlowNum, // segment indexed-unordered 558839e1a88SAnzooooo "b111".U -> segmentIndexFlowNum // segment indexed-ordered 55920a5248fSzhanglinjuan )))} 56020a5248fSzhanglinjuan} 56120a5248fSzhanglinjuan 562839e1a88SAnzoooooobject GenRealFlowLog2 extends VLSUConstants { 56320a5248fSzhanglinjuan /** 56420a5248fSzhanglinjuan * GenRealFlowLog2 = Log2(GenRealFlowNum) 565839e1a88SAnzooooo * 566839e1a88SAnzooooo * @param instType As the name implies. 567839e1a88SAnzooooo * @param emul As the name implies. 568839e1a88SAnzooooo * @param lmul As the name implies. 569839e1a88SAnzooooo * @param eew As the name implies. 570839e1a88SAnzooooo * @param sew As the name implies. 571839e1a88SAnzooooo * @param isSegment Only modules related to segment need to be set to true. 572839e1a88SAnzooooo * @return FlowNumLog2 of instruction. 57320a5248fSzhanglinjuan */ 574839e1a88SAnzooooo def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, isSegment: Boolean = false): UInt = { 57532977e5dSAnzooooo require(instType.getWidth == 3, "The instType width must be 3, (isSegment, mop)") 576d73f3705SAnzo require(eew.getWidth == 2, "The eew width must be 2.") 57720a5248fSzhanglinjuan val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul) 57820a5248fSzhanglinjuan val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul) 579d73f3705SAnzo val eewRealFlowLog2 = emulLog2 + log2Up(VLENB).U - eew 58020a5248fSzhanglinjuan val sewRealFlowLog2 = lmulLog2 + log2Up(VLENB).U - sew(1, 0) 581839e1a88SAnzooooo // Because the new segmentunit is needed. But the previous implementation is retained for the time being in case of emergency. 582839e1a88SAnzooooo val segmentIndexFlowLog2 = if (isSegment) sewRealFlowLog2 else Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2) 58320a5248fSzhanglinjuan (LookupTree(instType, List( 58420a5248fSzhanglinjuan "b000".U -> eewRealFlowLog2, // unit-stride 58520a5248fSzhanglinjuan "b010".U -> eewRealFlowLog2, // strided 58620a5248fSzhanglinjuan "b001".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-unordered 58720a5248fSzhanglinjuan "b011".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-ordered 58820a5248fSzhanglinjuan "b100".U -> eewRealFlowLog2, // segment unit-stride 58920a5248fSzhanglinjuan "b110".U -> eewRealFlowLog2, // segment strided 590839e1a88SAnzooooo "b101".U -> segmentIndexFlowLog2, // segment indexed-unordered 591839e1a88SAnzooooo "b111".U -> segmentIndexFlowLog2, // segment indexed-ordered 59220a5248fSzhanglinjuan ))) 59320a5248fSzhanglinjuan } 59420a5248fSzhanglinjuan} 59520a5248fSzhanglinjuan 59620a5248fSzhanglinjuan/** 59720a5248fSzhanglinjuan * GenElemIdx generals an element index within an instruction, given a certain uopIdx and a known flowIdx 59820a5248fSzhanglinjuan * inside the uop. 59920a5248fSzhanglinjuan */ 60020a5248fSzhanglinjuanobject GenElemIdx extends VLSUConstants { 601748999d4Szhanglinjuan def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, 602748999d4Szhanglinjuan uopIdx: UInt, flowIdx: UInt): UInt = { 603d73f3705SAnzo require(eew.getWidth == 2, "The eew width must be 2.") 604748999d4Szhanglinjuan val isIndexed = instType(0).asBool 605d73f3705SAnzo val eewUopFlowsLog2 = Mux(emul.asSInt > 0.S, 0.U, emul) + log2Up(VLENB).U - eew 606748999d4Szhanglinjuan val sewUopFlowsLog2 = Mux(lmul.asSInt > 0.S, 0.U, lmul) + log2Up(VLENB).U - sew(1, 0) 607748999d4Szhanglinjuan val uopFlowsLog2 = Mux( 608748999d4Szhanglinjuan isIndexed, 609748999d4Szhanglinjuan Mux(emul.asSInt > lmul.asSInt, eewUopFlowsLog2, sewUopFlowsLog2), 610748999d4Szhanglinjuan eewUopFlowsLog2 61120a5248fSzhanglinjuan ) 612748999d4Szhanglinjuan LookupTree(uopFlowsLog2, List( 613*688cc4e8SAnzo 0.U -> uopIdx ## flowIdx(0), // for hardware misalign 614748999d4Szhanglinjuan 1.U -> uopIdx ## flowIdx(0), 615748999d4Szhanglinjuan 2.U -> uopIdx ## flowIdx(1, 0), 616748999d4Szhanglinjuan 3.U -> uopIdx ## flowIdx(2, 0), 617748999d4Szhanglinjuan 4.U -> uopIdx ## flowIdx(3, 0) 618748999d4Szhanglinjuan )) 61920a5248fSzhanglinjuan } 62020a5248fSzhanglinjuan} 62120a5248fSzhanglinjuan 62220a5248fSzhanglinjuan/** 62320a5248fSzhanglinjuan * GenVLMAX calculates VLMAX, which equals MUL * ew 62420a5248fSzhanglinjuan */ 62520a5248fSzhanglinjuanobject GenVLMAXLog2 extends VLSUConstants { 62620a5248fSzhanglinjuan def apply(lmul: UInt, sew: UInt): UInt = lmul + log2Up(VLENB).U - sew 62720a5248fSzhanglinjuan} 62820a5248fSzhanglinjuanobject GenVLMAX { 62920a5248fSzhanglinjuan def apply(lmul: UInt, sew: UInt): UInt = 1.U << GenVLMAXLog2(lmul, sew) 63020a5248fSzhanglinjuan} 6310869ae56Sweiding liu/** 6320869ae56Sweiding liu * generate mask base on vlmax 6330869ae56Sweiding liu * example: vlmax = b100, max = b011 6340869ae56Sweiding liu * */ 6350869ae56Sweiding liuobject GenVlMaxMask{ 6360869ae56Sweiding liu def apply(vlmax: UInt, length: Int): UInt = (vlmax - 1.U)(length-1, 0) 6370869ae56Sweiding liu} 63820a5248fSzhanglinjuan 63920a5248fSzhanglinjuanobject GenUSWholeRegVL extends VLSUConstants { 64020a5248fSzhanglinjuan def apply(nfields: UInt, eew: UInt): UInt = { 641d73f3705SAnzo require(eew.getWidth == 2, "The eew width must be 2.") 642d73f3705SAnzo LookupTree(eew, List( 64320a5248fSzhanglinjuan "b00".U -> (nfields << (log2Up(VLENB) - 0)), 64420a5248fSzhanglinjuan "b01".U -> (nfields << (log2Up(VLENB) - 1)), 64520a5248fSzhanglinjuan "b10".U -> (nfields << (log2Up(VLENB) - 2)), 64620a5248fSzhanglinjuan "b11".U -> (nfields << (log2Up(VLENB) - 3)) 64720a5248fSzhanglinjuan )) 64820a5248fSzhanglinjuan } 64920a5248fSzhanglinjuan} 65006cb2bc1Sweidingliuobject GenUSWholeEmul extends VLSUConstants{ 65106cb2bc1Sweidingliu def apply(nf: UInt): UInt={ 65206cb2bc1Sweidingliu LookupTree(nf,List( 65306cb2bc1Sweidingliu "b000".U -> "b000".U(mulBits.W), 65406cb2bc1Sweidingliu "b001".U -> "b001".U(mulBits.W), 65506cb2bc1Sweidingliu "b011".U -> "b010".U(mulBits.W), 65606cb2bc1Sweidingliu "b111".U -> "b011".U(mulBits.W) 65706cb2bc1Sweidingliu )) 65806cb2bc1Sweidingliu } 65906cb2bc1Sweidingliu} 66006cb2bc1Sweidingliu 66106cb2bc1Sweidingliu 66220a5248fSzhanglinjuanobject GenUSMaskRegVL extends VLSUConstants { 66320a5248fSzhanglinjuan def apply(vl: UInt): UInt = { 664115faeaaSweiding liu Mux(vl(2,0) === 0.U , (vl >> 3.U), ((vl >> 3.U) + 1.U)) 66520a5248fSzhanglinjuan } 66620a5248fSzhanglinjuan} 66720a5248fSzhanglinjuan 66820a5248fSzhanglinjuanobject GenUopByteMask { 66920a5248fSzhanglinjuan def apply(flowMask: UInt, alignedType: UInt): UInt = { 67020a5248fSzhanglinjuan LookupTree(alignedType, List( 67100e6f2e2Sweiding liu "b000".U -> flowMask, 67200e6f2e2Sweiding liu "b001".U -> FillInterleaved(2, flowMask), 67300e6f2e2Sweiding liu "b010".U -> FillInterleaved(4, flowMask), 67400e6f2e2Sweiding liu "b011".U -> FillInterleaved(8, flowMask), 67500e6f2e2Sweiding liu "b100".U -> FillInterleaved(16, flowMask) 67620a5248fSzhanglinjuan )) 67720a5248fSzhanglinjuan } 67820a5248fSzhanglinjuan} 67920a5248fSzhanglinjuan 680a5204571Szhanglinjuanobject GenVdIdxInField extends VLSUConstants { 6812838e2b9Szhanglinjuan def apply(instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = { 6822838e2b9Szhanglinjuan val vdIdx = Wire(UInt(log2Up(maxMUL).W)) 6832838e2b9Szhanglinjuan when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || lmul.asSInt > emul.asSInt) { 6842838e2b9Szhanglinjuan // Unit-stride or Strided, or indexed with lmul >= emul 6852838e2b9Szhanglinjuan vdIdx := uopIdx 6862838e2b9Szhanglinjuan }.otherwise { 6872838e2b9Szhanglinjuan // Indexed with lmul <= emul 6882838e2b9Szhanglinjuan val multiple = emul - lmul 6892838e2b9Szhanglinjuan val uopIdxWidth = uopIdx.getWidth 6902838e2b9Szhanglinjuan vdIdx := LookupTree(multiple, List( 6912838e2b9Szhanglinjuan 0.U -> uopIdx, 692a5204571Szhanglinjuan 1.U -> (uopIdx >> 1), 693a5204571Szhanglinjuan 2.U -> (uopIdx >> 2), 694a5204571Szhanglinjuan 3.U -> (uopIdx >> 3) 6952838e2b9Szhanglinjuan )) 6962838e2b9Szhanglinjuan } 6972838e2b9Szhanglinjuan vdIdx 69820a5248fSzhanglinjuan } 69920a5248fSzhanglinjuan} 7000f9b302eSweiding liu/** 7010f9b302eSweiding liu* Use start and vl to generate flow activative mask 7020f9b302eSweiding liu* mod = true fill 0 7030f9b302eSweiding liu* mod = false fill 1 7040f9b302eSweiding liu*/ 7050f9b302eSweiding liuobject GenFlowMask extends VLSUConstants { 7060f9b302eSweiding liu def apply(elementMask: UInt, start: UInt, vl: UInt , mod: Boolean): UInt = { 7070f9b302eSweiding liu val startMask = ~UIntToMask(start, VLEN) 7080f9b302eSweiding liu val vlMask = UIntToMask(vl, VLEN) 7090f9b302eSweiding liu val maskVlStart = vlMask & startMask 7100f9b302eSweiding liu if(mod){ 7110f9b302eSweiding liu elementMask & maskVlStart 7120f9b302eSweiding liu } 7130f9b302eSweiding liu else{ 7140f9b302eSweiding liu (~elementMask).asUInt & maskVlStart 7150f9b302eSweiding liu } 7160f9b302eSweiding liu } 7170f9b302eSweiding liu} 7180f9b302eSweiding liu 71900e6f2e2Sweiding liuobject genVWmask128 { 72000e6f2e2Sweiding liu def apply(addr: UInt, sizeEncode: UInt): UInt = { 72100e6f2e2Sweiding liu (LookupTree(sizeEncode, List( 72200e6f2e2Sweiding liu "b000".U -> 0x1.U, //0001 << addr(2:0) 72300e6f2e2Sweiding liu "b001".U -> 0x3.U, //0011 72400e6f2e2Sweiding liu "b010".U -> 0xf.U, //1111 72500e6f2e2Sweiding liu "b011".U -> 0xff.U, //11111111 7263c808de0SAnzo "b100".U -> 0xffff.U, //1111111111111111 7273c808de0SAnzo "b111".U -> 0xffff.U //cbo 72800e6f2e2Sweiding liu )) << addr(3, 0)).asUInt 72900e6f2e2Sweiding liu } 73000e6f2e2Sweiding liu} 73100e6f2e2Sweiding liu/* 73200e6f2e2Sweiding liu* only use in max length is 128 73300e6f2e2Sweiding liu*/ 73400e6f2e2Sweiding liuobject genVWdata { 73500e6f2e2Sweiding liu def apply(data: UInt, sizeEncode: UInt): UInt = { 73600e6f2e2Sweiding liu LookupTree(sizeEncode, List( 73700e6f2e2Sweiding liu "b000".U -> Fill(16, data(7, 0)), 73800e6f2e2Sweiding liu "b001".U -> Fill(8, data(15, 0)), 73900e6f2e2Sweiding liu "b010".U -> Fill(4, data(31, 0)), 74000e6f2e2Sweiding liu "b011".U -> Fill(2, data(63,0)), 74100e6f2e2Sweiding liu "b100".U -> data(127,0) 74200e6f2e2Sweiding liu )) 74300e6f2e2Sweiding liu } 74400e6f2e2Sweiding liu} 7453952421bSweiding liu 7463952421bSweiding liuobject genUSSplitAddr{ 747b5287751SAnzo def apply(addr: UInt, index: UInt, width: Int): UInt = { 748b5287751SAnzo val tmpAddr = Cat(addr(width - 1, 4), 0.U(4.W)) 7493952421bSweiding liu val nextCacheline = tmpAddr + 16.U 7503952421bSweiding liu LookupTree(index, List( 7513952421bSweiding liu 0.U -> tmpAddr, 7523952421bSweiding liu 1.U -> nextCacheline 7533952421bSweiding liu )) 7543952421bSweiding liu } 7553952421bSweiding liu} 7563952421bSweiding liu 7573952421bSweiding liuobject genUSSplitMask{ 758c8d442a6Sweiding liu def apply(mask: UInt, index: UInt): UInt = { 759c8d442a6Sweiding liu require(mask.getWidth == 32) // need to be 32-bits 7603952421bSweiding liu LookupTree(index, List( 761c8d442a6Sweiding liu 0.U -> mask(15, 0), 762c8d442a6Sweiding liu 1.U -> mask(31, 16), 7633952421bSweiding liu )) 7643952421bSweiding liu } 7653952421bSweiding liu} 7663952421bSweiding liu 7673952421bSweiding liuobject genUSSplitData{ 7683952421bSweiding liu def apply(data: UInt, index: UInt, addrOffset: UInt): UInt = { 7693952421bSweiding liu val tmpData = WireInit(0.U(256.W)) 7703952421bSweiding liu val lookupTable = (0 until 16).map{case i => 7713952421bSweiding liu if(i == 0){ 7723952421bSweiding liu i.U -> Cat(0.U(128.W), data) 7733952421bSweiding liu }else{ 7743952421bSweiding liu i.U -> Cat(0.U(((16-i)*8).W), data, 0.U((i*8).W)) 7753952421bSweiding liu } 7763952421bSweiding liu } 7773952421bSweiding liu tmpData := LookupTree(addrOffset, lookupTable).asUInt 7783952421bSweiding liu 7793952421bSweiding liu LookupTree(index, List( 7803952421bSweiding liu 0.U -> tmpData(127, 0), 7813952421bSweiding liu 1.U -> tmpData(255, 128) 7823952421bSweiding liu )) 7833952421bSweiding liu } 7843952421bSweiding liu} 7853952421bSweiding liu 78608047a41SAnzoooooobject genVSData extends VLSUConstants { 7873952421bSweiding liu def apply(data: UInt, elemIdx: UInt, alignedType: UInt): UInt = { 7883952421bSweiding liu LookupTree(alignedType, List( 7893952421bSweiding liu "b000".U -> ZeroExt(LookupTree(elemIdx(3, 0), List.tabulate(VLEN/8)(i => i.U -> getByte(data, i))), VLEN), 7903952421bSweiding liu "b001".U -> ZeroExt(LookupTree(elemIdx(2, 0), List.tabulate(VLEN/16)(i => i.U -> getHalfWord(data, i))), VLEN), 7913952421bSweiding liu "b010".U -> ZeroExt(LookupTree(elemIdx(1, 0), List.tabulate(VLEN/32)(i => i.U -> getWord(data, i))), VLEN), 7923952421bSweiding liu "b011".U -> ZeroExt(LookupTree(elemIdx(0), List.tabulate(VLEN/64)(i => i.U -> getDoubleWord(data, i))), VLEN), 7933952421bSweiding liu "b100".U -> data // if have wider element, it will broken 7943952421bSweiding liu )) 7953952421bSweiding liu } 7963952421bSweiding liu} 79761054c5cSAnzooooo 79861054c5cSAnzooooo// TODO: more elegant 79961054c5cSAnzoooooobject genVStride extends VLSUConstants { 80061054c5cSAnzooooo def apply(uopIdx: UInt, stride: UInt): UInt = { 80161054c5cSAnzooooo LookupTree(uopIdx, List( 80261054c5cSAnzooooo 0.U -> 0.U, 80361054c5cSAnzooooo 1.U -> stride, 80461054c5cSAnzooooo 2.U -> (stride << 1), 80561054c5cSAnzooooo 3.U -> ((stride << 1).asUInt + stride), 80661054c5cSAnzooooo 4.U -> (stride << 2), 80761054c5cSAnzooooo 5.U -> ((stride << 2).asUInt + stride), 80861054c5cSAnzooooo 6.U -> ((stride << 2).asUInt + (stride << 1)), 80961054c5cSAnzooooo 7.U -> ((stride << 2).asUInt + (stride << 1) + stride) 81061054c5cSAnzooooo )) 81161054c5cSAnzooooo } 81261054c5cSAnzooooo} 81388884326Sweiding liu/** 81488884326Sweiding liu * generate uopOffset, not used in segment instruction 81588884326Sweiding liu * */ 81661054c5cSAnzoooooobject genVUopOffset extends VLSUConstants { 81772439acfSAnzooooo def apply(instType: UInt, isfof: Bool, uopidx: UInt, nf: UInt, eew: UInt, stride: UInt, alignedType: UInt): UInt = { 81861054c5cSAnzooooo val uopInsidefield = (uopidx >> nf).asUInt // when nf == 0, is uopidx 81972439acfSAnzooooo 820df3b4b92SAnzooooo// val fofVUopOffset = (LookupTree(instType,List( 821df3b4b92SAnzooooo// "b000".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew) ) , // unit-stride fof 822df3b4b92SAnzooooo// "b100".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew) ) , // segment unit-stride fof 823df3b4b92SAnzooooo// ))).asUInt 82472439acfSAnzooooo 82572439acfSAnzooooo val otherVUopOffset = (LookupTree(instType,List( 82672439acfSAnzooooo "b000".U -> ( uopInsidefield << alignedType ) , // unit-stride 82761054c5cSAnzooooo "b010".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew) ) , // strided 82861054c5cSAnzooooo "b001".U -> ( 0.U ) , // indexed-unordered 82961054c5cSAnzooooo "b011".U -> ( 0.U ) , // indexed-ordered 83072439acfSAnzooooo "b100".U -> ( uopInsidefield << alignedType ) , // segment unit-stride 83161054c5cSAnzooooo "b110".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew) ) , // segment strided 83261054c5cSAnzooooo "b101".U -> ( 0.U ) , // segment indexed-unordered 83361054c5cSAnzooooo "b111".U -> ( 0.U ) // segment indexed-ordered 83461054c5cSAnzooooo ))).asUInt 83572439acfSAnzooooo 836df3b4b92SAnzooooo// Mux(isfof, fofVUopOffset, otherVUopOffset) 837df3b4b92SAnzooooo otherVUopOffset 83861054c5cSAnzooooo } 83961054c5cSAnzooooo} 8405dc0f712SAnzooooo 8415dc0f712SAnzooooo 8425dc0f712SAnzooooo 84347986d36SAnzoobject genVFirstUnmask extends VLSUConstants { 84447986d36SAnzo /** 84547986d36SAnzo * Find the lowest unmasked number of bits. 84647986d36SAnzo * example: 84747986d36SAnzo * mask = 16'b1111_1111_1110_0000 84847986d36SAnzo * return 5 84947986d36SAnzo * @param mask 16bits of mask. 85047986d36SAnzo * @return lowest unmasked number of bits. 85147986d36SAnzo */ 8525dc0f712SAnzooooo def apply(mask: UInt): UInt = { 8535dc0f712SAnzooooo require(mask.getWidth == 16, "The mask width must be 16") 8545dc0f712SAnzooooo val select = (0 until 16).zip(mask.asBools).map{case (i, v) => 8555dc0f712SAnzooooo (v, i.U) 8565dc0f712SAnzooooo } 8575dc0f712SAnzooooo PriorityMuxDefault(select, 0.U) 8585dc0f712SAnzooooo } 8595dc0f712SAnzooooo 8605dc0f712SAnzooooo def apply(mask: UInt, regOffset: UInt): UInt = { 8615dc0f712SAnzooooo require(mask.getWidth == 16, "The mask width must be 16") 8625dc0f712SAnzooooo val realMask = (mask >> regOffset).asUInt 8635dc0f712SAnzooooo val select = (0 until 16).zip(realMask.asBools).map{case (i, v) => 8645dc0f712SAnzooooo (v, i.U) 8655dc0f712SAnzooooo } 8665dc0f712SAnzooooo PriorityMuxDefault(select, 0.U) 8675dc0f712SAnzooooo } 8685dc0f712SAnzooooo} 8695dc0f712SAnzooooo 87008b0bc30Shappy-lxclass skidBufferConnect[T <: Data](gen: T) extends Module { 87108b0bc30Shappy-lx val io = IO(new Bundle() { 87208b0bc30Shappy-lx val in = Flipped(DecoupledIO(gen.cloneType)) 87308b0bc30Shappy-lx val flush = Input(Bool()) 87408b0bc30Shappy-lx val out = DecoupledIO(gen.cloneType) 87508b0bc30Shappy-lx }) 87608b0bc30Shappy-lx 87708b0bc30Shappy-lx skidBuffer.connect(io.in, io.out, io.flush) 87808b0bc30Shappy-lx} 87908b0bc30Shappy-lx 88008b0bc30Shappy-lxobject skidBuffer{ 88108b0bc30Shappy-lx /* 88208b0bc30Shappy-lx * Skid Buffer used to break timing path of ready 88308b0bc30Shappy-lx * */ 88408b0bc30Shappy-lx def connect[T <: Data]( 88508b0bc30Shappy-lx in: DecoupledIO[T], 88608b0bc30Shappy-lx out: DecoupledIO[T], 88708b0bc30Shappy-lx flush: Bool 88808b0bc30Shappy-lx ): T = { 88908b0bc30Shappy-lx val empty :: skid :: Nil = Enum(2) 89008b0bc30Shappy-lx val state = RegInit(empty) 89108b0bc30Shappy-lx val stateNext = WireInit(empty) 89208b0bc30Shappy-lx val dataBuffer = RegEnable(in.bits, (!out.ready && in.fire)) 89308b0bc30Shappy-lx 89408b0bc30Shappy-lx when(state === empty){ 89508b0bc30Shappy-lx stateNext := Mux(!out.ready && in.fire && !flush, skid, empty) 89608b0bc30Shappy-lx }.elsewhen(state === skid){ 89708b0bc30Shappy-lx stateNext := Mux(out.ready || flush, empty, skid) 89808b0bc30Shappy-lx } 89908b0bc30Shappy-lx state := stateNext 90008b0bc30Shappy-lx 90108b0bc30Shappy-lx in.ready := state === empty 90208b0bc30Shappy-lx out.bits := Mux(state === skid, dataBuffer, in.bits) 90308b0bc30Shappy-lx out.valid := in.valid || (state === skid) 90408b0bc30Shappy-lx 90508b0bc30Shappy-lx dataBuffer 90608b0bc30Shappy-lx } 90708b0bc30Shappy-lx def apply[T <: Data]( 90808b0bc30Shappy-lx in: DecoupledIO[T], 90908b0bc30Shappy-lx out: DecoupledIO[T], 91008b0bc30Shappy-lx flush: Bool, 91108b0bc30Shappy-lx moduleName: String 912233f2ad0Szhanglinjuan ): Unit = { 91308b0bc30Shappy-lx val buffer = Module(new skidBufferConnect(in.bits)) 91408b0bc30Shappy-lx buffer.suggestName(moduleName) 91508b0bc30Shappy-lx buffer.io.in <> in 91608b0bc30Shappy-lx buffer.io.flush := flush 91708b0bc30Shappy-lx out <> buffer.io.out 91808b0bc30Shappy-lx } 91908b0bc30Shappy-lx} 92008b0bc30Shappy-lx 921