xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VecCommon.scala (revision 688cc4e80703c22b2cd3570804f946813c224b12)
120a5248fSzhanglinjuan/***************************************************************************************
220a5248fSzhanglinjuan  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
320a5248fSzhanglinjuan  * Copyright (c) 2020-2021 Peng Cheng Laboratory
420a5248fSzhanglinjuan  *
520a5248fSzhanglinjuan  * XiangShan is licensed under Mulan PSL v2.
620a5248fSzhanglinjuan  * You can use this software according to the terms and conditions of the Mulan PSL v2.
720a5248fSzhanglinjuan  * You may obtain a copy of Mulan PSL v2 at:
820a5248fSzhanglinjuan  *          http://license.coscl.org.cn/MulanPSL2
920a5248fSzhanglinjuan  *
1020a5248fSzhanglinjuan  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
1120a5248fSzhanglinjuan  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
1220a5248fSzhanglinjuan  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
1320a5248fSzhanglinjuan  *
1420a5248fSzhanglinjuan  * See the Mulan PSL v2 for more details.
1520a5248fSzhanglinjuan  ***************************************************************************************/
1620a5248fSzhanglinjuan
1720a5248fSzhanglinjuanpackage xiangshan.mem
1820a5248fSzhanglinjuan
1920a5248fSzhanglinjuanimport org.chipsalliance.cde.config.Parameters
2020a5248fSzhanglinjuanimport chisel3._
2120a5248fSzhanglinjuanimport chisel3.util._
2220a5248fSzhanglinjuanimport utils._
2320a5248fSzhanglinjuanimport utility._
2420a5248fSzhanglinjuanimport xiangshan._
2520a5248fSzhanglinjuanimport xiangshan.backend.rob.RobPtr
2620a5248fSzhanglinjuanimport xiangshan.backend.Bundles._
2726af847eSgood-circleimport xiangshan.backend.fu.FuType
28d73f3705SAnzoimport xiangshan.backend.fu.vector.Bundles.VEew
2920a5248fSzhanglinjuan
3020a5248fSzhanglinjuan/**
3120a5248fSzhanglinjuan  * Common used parameters or functions in vlsu
3220a5248fSzhanglinjuan  */
3320a5248fSzhanglinjuantrait VLSUConstants {
3420a5248fSzhanglinjuan  val VLEN = 128
350f9b302eSweiding liu  //for pack unit-stride flow
360f9b302eSweiding liu  val AlignedNum = 4 // 1/2/4/8
3720a5248fSzhanglinjuan  def VLENB = VLEN/8
3820a5248fSzhanglinjuan  def vOffsetBits = log2Up(VLENB) // bits-width to index offset inside a vector reg
3926af847eSgood-circle  lazy val vlmBindexBits = 8 //will be overrided later
4026af847eSgood-circle  lazy val vsmBindexBits = 8 // will be overrided later
4120a5248fSzhanglinjuan
4200e6f2e2Sweiding liu  def alignTypes = 5 // eew/sew = 1/2/4/8, last indicate 128 bit element
4320a5248fSzhanglinjuan  def alignTypeBits = log2Up(alignTypes)
4420a5248fSzhanglinjuan  def maxMUL = 8
4520a5248fSzhanglinjuan  def maxFields = 8
4620a5248fSzhanglinjuan  /**
4720a5248fSzhanglinjuan    * In the most extreme cases like a segment indexed instruction, eew=64, emul=8, sew=8, lmul=1,
4820a5248fSzhanglinjuan    * and nf=8, each data reg is mapped with 8 index regs and there are 8 data regs in total,
4920a5248fSzhanglinjuan    * each for a field. Therefore an instruction can be divided into 64 uops at most.
5020a5248fSzhanglinjuan    */
5120a5248fSzhanglinjuan  def maxUopNum = maxMUL * maxFields // 64
5220a5248fSzhanglinjuan  def maxFlowNum = 16
5320a5248fSzhanglinjuan  def maxElemNum = maxMUL * maxFlowNum // 128
5420a5248fSzhanglinjuan  // def uopIdxBits = log2Up(maxUopNum) // to index uop inside an robIdx
5520a5248fSzhanglinjuan  def elemIdxBits = log2Up(maxElemNum) + 1 // to index which element in an instruction
5620a5248fSzhanglinjuan  def flowIdxBits = log2Up(maxFlowNum) + 1 // to index which flow in a uop
5720a5248fSzhanglinjuan  def fieldBits = log2Up(maxFields) + 1 // 4-bits to indicate 1~8
5820a5248fSzhanglinjuan
5920a5248fSzhanglinjuan  def ewBits = 3 // bits-width of EEW/SEW
6020a5248fSzhanglinjuan  def mulBits = 3 // bits-width of emul/lmul
6120a5248fSzhanglinjuan
6220a5248fSzhanglinjuan  def getSlice(data: UInt, i: Int, alignBits: Int): UInt = {
6320a5248fSzhanglinjuan    require(data.getWidth >= (i+1) * alignBits)
6420a5248fSzhanglinjuan    data((i+1) * alignBits - 1, i * alignBits)
6520a5248fSzhanglinjuan  }
66b5d66726Sweiding liu  def getNoAlignedSlice(data: UInt, i: Int, alignBits: Int): UInt = {
67b5d66726Sweiding liu    data(i * 8 + alignBits - 1, i * 8)
68b5d66726Sweiding liu  }
6920a5248fSzhanglinjuan
7020a5248fSzhanglinjuan  def getByte(data: UInt, i: Int = 0) = getSlice(data, i, 8)
7120a5248fSzhanglinjuan  def getHalfWord(data: UInt, i: Int = 0) = getSlice(data, i, 16)
7220a5248fSzhanglinjuan  def getWord(data: UInt, i: Int = 0) = getSlice(data, i, 32)
7320a5248fSzhanglinjuan  def getDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 64)
7400e6f2e2Sweiding liu  def getDoubleDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 128)
7520a5248fSzhanglinjuan}
7620a5248fSzhanglinjuan
7720a5248fSzhanglinjuantrait HasVLSUParameters extends HasXSParameter with VLSUConstants {
7820a5248fSzhanglinjuan  override val VLEN = coreParams.VLEN
7926af847eSgood-circle  override lazy val vlmBindexBits = log2Up(coreParams.VlMergeBufferSize)
8026af847eSgood-circle  override lazy val vsmBindexBits = log2Up(coreParams.VsMergeBufferSize)
81a31db3ffSweiding liu  lazy val maxMemByteNum = 16 // Maximum bytes for a single memory access
82a31db3ffSweiding liu  /**
83a31db3ffSweiding liu   * get addr aligned low bits
84a31db3ffSweiding liu   * @param addr Address to be check
85a31db3ffSweiding liu   * @param width Width for checking alignment
86a31db3ffSweiding liu   */
87a31db3ffSweiding liu  def getCheckAddrLowBits(addr: UInt, width: Int): UInt = addr(log2Up(width) - 1, 0)
88a31db3ffSweiding liu  def getOverflowBit(in: UInt, width: Int): UInt = in(log2Up(width))
8920a5248fSzhanglinjuan  def isUnitStride(instType: UInt) = instType(1, 0) === "b00".U
9020a5248fSzhanglinjuan  def isStrided(instType: UInt) = instType(1, 0) === "b10".U
9120a5248fSzhanglinjuan  def isIndexed(instType: UInt) = instType(0) === "b1".U
9220a5248fSzhanglinjuan  def isNotIndexed(instType: UInt) = instType(0) === "b0".U
93a5204571Szhanglinjuan  def isSegment(instType: UInt) = instType(2) === "b1".U
9400e6f2e2Sweiding liu  def is128Bit(alignedType: UInt) = alignedType(2) === "b1".U
9520a5248fSzhanglinjuan
9620a5248fSzhanglinjuan  def mergeDataWithMask(oldData: UInt, newData: UInt, mask: UInt): Vec[UInt] = {
9720a5248fSzhanglinjuan    require(oldData.getWidth == newData.getWidth)
9820a5248fSzhanglinjuan    require(oldData.getWidth == mask.getWidth * 8)
9920a5248fSzhanglinjuan    VecInit(mask.asBools.zipWithIndex.map { case (en, i) =>
10020a5248fSzhanglinjuan      Mux(en, getByte(newData, i), getByte(oldData, i))
10120a5248fSzhanglinjuan    })
10220a5248fSzhanglinjuan  }
10320a5248fSzhanglinjuan
10420a5248fSzhanglinjuan  // def asBytes(data: UInt) = {
10520a5248fSzhanglinjuan  //   require(data.getWidth % 8 == 0)
10620a5248fSzhanglinjuan  //   (0 until data.getWidth/8).map(i => getByte(data, i))
10720a5248fSzhanglinjuan  // }
10820a5248fSzhanglinjuan
10920a5248fSzhanglinjuan  def mergeDataWithElemIdx(
11020a5248fSzhanglinjuan    oldData: UInt,
11120a5248fSzhanglinjuan    newData: Seq[UInt],
11220a5248fSzhanglinjuan    alignedType: UInt,
11320a5248fSzhanglinjuan    elemIdx: Seq[UInt],
11420a5248fSzhanglinjuan    valids: Seq[Bool]
11520a5248fSzhanglinjuan  ): UInt = {
11620a5248fSzhanglinjuan    require(newData.length == elemIdx.length)
11720a5248fSzhanglinjuan    require(newData.length == valids.length)
11820a5248fSzhanglinjuan    LookupTree(alignedType, List(
11920a5248fSzhanglinjuan      "b00".U -> VecInit(elemIdx.map(e => UIntToOH(e(3, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
12020a5248fSzhanglinjuan        ParallelPosteriorityMux(
12120a5248fSzhanglinjuan          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
12220a5248fSzhanglinjuan          getByte(oldData, i) +: newData.map(getByte(_))
12320a5248fSzhanglinjuan        )}).asUInt,
12420a5248fSzhanglinjuan      "b01".U -> VecInit(elemIdx.map(e => UIntToOH(e(2, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
12520a5248fSzhanglinjuan        ParallelPosteriorityMux(
12620a5248fSzhanglinjuan          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
12720a5248fSzhanglinjuan          getHalfWord(oldData, i) +: newData.map(getHalfWord(_))
12820a5248fSzhanglinjuan        )}).asUInt,
12920a5248fSzhanglinjuan      "b10".U -> VecInit(elemIdx.map(e => UIntToOH(e(1, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
13020a5248fSzhanglinjuan        ParallelPosteriorityMux(
13120a5248fSzhanglinjuan          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
13220a5248fSzhanglinjuan          getWord(oldData, i) +: newData.map(getWord(_))
13320a5248fSzhanglinjuan        )}).asUInt,
13420a5248fSzhanglinjuan      "b11".U -> VecInit(elemIdx.map(e => UIntToOH(e(0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
13520a5248fSzhanglinjuan        ParallelPosteriorityMux(
13620a5248fSzhanglinjuan          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
13720a5248fSzhanglinjuan          getDoubleWord(oldData, i) +: newData.map(getDoubleWord(_))
13820a5248fSzhanglinjuan        )}).asUInt
13920a5248fSzhanglinjuan    ))
14020a5248fSzhanglinjuan  }
14120a5248fSzhanglinjuan
14220a5248fSzhanglinjuan  def mergeDataWithElemIdx(oldData: UInt, newData: UInt, alignedType: UInt, elemIdx: UInt): UInt = {
14320a5248fSzhanglinjuan    mergeDataWithElemIdx(oldData, Seq(newData), alignedType, Seq(elemIdx), Seq(true.B))
14420a5248fSzhanglinjuan  }
14526af847eSgood-circle  /**
14626af847eSgood-circle    * for merge 128-bits data of unit-stride
14726af847eSgood-circle    */
148b5d66726Sweiding liu  object mergeDataByByte{
149b5d66726Sweiding liu    def apply(oldData: UInt, newData: UInt, mask: UInt): UInt = {
150b5d66726Sweiding liu      val selVec = Seq(mask).map(_.asBools).transpose
151b5d66726Sweiding liu      VecInit(selVec.zipWithIndex.map{ case (selV, i) =>
15226af847eSgood-circle        ParallelPosteriorityMux(
153b5d66726Sweiding liu          true.B +: selV.map(x => x),
154b5d66726Sweiding liu          getByte(oldData, i) +: Seq(getByte(newData, i))
15526af847eSgood-circle        )}).asUInt
15626af847eSgood-circle    }
15726af847eSgood-circle  }
158b5d66726Sweiding liu
159b5d66726Sweiding liu  /**
160b5d66726Sweiding liu    * for merge Unit-Stride data to 256-bits
161b5d66726Sweiding liu    * merge 128-bits data to 256-bits
162b5d66726Sweiding liu    * if have 3 port,
163b5d66726Sweiding liu    *   if is port0, it is 6 to 1 Multiplexer -> (128'b0, data) or (data, 128'b0) or (data, port2data) or (port2data, data) or (data, port3data) or (port3data, data)
164b5d66726Sweiding liu    *   if is port1, it is 4 to 1 Multiplexer -> (128'b0, data) or (data, 128'b0) or (data, port3data) or (port3data, data)
165b5d66726Sweiding liu    *   if is port3, it is 2 to 1 Multiplexer -> (128'b0, data) or (data, 128'b0)
166b5d66726Sweiding liu    *
167b5d66726Sweiding liu    */
168b5d66726Sweiding liu  object mergeDataByIndex{
169b5d66726Sweiding liu    def apply(data:  Seq[UInt], mask: Seq[UInt], index: UInt, valids: Seq[Bool]): (UInt, UInt) = {
170b5d66726Sweiding liu      require(data.length == valids.length)
171b5d66726Sweiding liu      require(data.length == mask.length)
172b5d66726Sweiding liu      val muxLength = data.length
173b5d66726Sweiding liu      val selDataMatrix = Wire(Vec(muxLength, Vec(2, UInt((VLEN * 2).W)))) // 3 * 2 * 256
174b5d66726Sweiding liu      val selMaskMatrix = Wire(Vec(muxLength, Vec(2, UInt((VLENB * 2).W)))) // 3 * 2 * 16
175189d8d00SAnzo
176189d8d00SAnzo      if (backendParams.debugEn){
177b5d66726Sweiding liu        dontTouch(selDataMatrix)
178b5d66726Sweiding liu        dontTouch(selMaskMatrix)
179189d8d00SAnzo      }
180189d8d00SAnzo
181b5d66726Sweiding liu      for(i <- 0 until muxLength){
182b5d66726Sweiding liu        if(i == 0){
183b5d66726Sweiding liu          selDataMatrix(i)(0) := Cat(0.U(VLEN.W), data(i))
184b5d66726Sweiding liu          selDataMatrix(i)(1) := Cat(data(i), 0.U(VLEN.W))
185b5d66726Sweiding liu          selMaskMatrix(i)(0) := Cat(0.U(VLENB.W), mask(i))
186b5d66726Sweiding liu          selMaskMatrix(i)(1) := Cat(mask(i), 0.U(VLENB.W))
187b5d66726Sweiding liu        }
188b5d66726Sweiding liu        else{
189b5d66726Sweiding liu          selDataMatrix(i)(0) := Cat(data(i), data(0))
190b5d66726Sweiding liu          selDataMatrix(i)(1) := Cat(data(0), data(i))
191b5d66726Sweiding liu          selMaskMatrix(i)(0) := Cat(mask(i), mask(0))
192b5d66726Sweiding liu          selMaskMatrix(i)(1) := Cat(mask(0), mask(i))
193b5d66726Sweiding liu        }
194b5d66726Sweiding liu      }
195b5d66726Sweiding liu      val selIdxVec = (0 until muxLength).map(_.U)
196b5d66726Sweiding liu      val selIdx    = PriorityMux(valids.reverse, selIdxVec.reverse)
197b5d66726Sweiding liu
19855178b77Sweiding liu      val selData = Mux(index === 0.U,
19955178b77Sweiding liu                        selDataMatrix(selIdx)(0),
20055178b77Sweiding liu                        selDataMatrix(selIdx)(1))
20155178b77Sweiding liu      val selMask = Mux(index === 0.U,
20255178b77Sweiding liu                        selMaskMatrix(selIdx)(0),
20355178b77Sweiding liu                        selMaskMatrix(selIdx)(1))
204b5d66726Sweiding liu      (selData, selMask)
205b5d66726Sweiding liu    }
206b5d66726Sweiding liu  }
207b5d66726Sweiding liu  def mergeDataByIndex(data:  UInt, mask: UInt, index: UInt): (UInt, UInt) = {
208b5d66726Sweiding liu    mergeDataByIndex(Seq(data), Seq(mask), index, Seq(true.B))
20926af847eSgood-circle  }
21020a5248fSzhanglinjuan}
21120a5248fSzhanglinjuanabstract class VLSUModule(implicit p: Parameters) extends XSModule
21220a5248fSzhanglinjuan  with HasVLSUParameters
21320a5248fSzhanglinjuan  with HasCircularQueuePtrHelper
21420a5248fSzhanglinjuanabstract class VLSUBundle(implicit p: Parameters) extends XSBundle
21520a5248fSzhanglinjuan  with HasVLSUParameters
21620a5248fSzhanglinjuan
21720a5248fSzhanglinjuanclass VLSUBundleWithMicroOp(implicit p: Parameters) extends VLSUBundle {
21820a5248fSzhanglinjuan  val uop = new DynInst
21920a5248fSzhanglinjuan}
22020a5248fSzhanglinjuan
22120a5248fSzhanglinjuanclass OnlyVecExuOutput(implicit p: Parameters) extends VLSUBundle {
22220a5248fSzhanglinjuan  val isvec = Bool()
22320a5248fSzhanglinjuan  val vecdata = UInt(VLEN.W)
22420a5248fSzhanglinjuan  val mask = UInt(VLENB.W)
22520a5248fSzhanglinjuan  // val rob_idx_valid = Vec(2, Bool())
22620a5248fSzhanglinjuan  // val inner_idx = Vec(2, UInt(3.W))
22720a5248fSzhanglinjuan  // val rob_idx = Vec(2, new RobPtr)
22820a5248fSzhanglinjuan  // val offset = Vec(2, UInt(4.W))
22920a5248fSzhanglinjuan  val reg_offset = UInt(vOffsetBits.W)
230e20747afSXuan Hu  val vecActive = Bool() // 1: vector active element, 0: vector not active element
23120a5248fSzhanglinjuan  val is_first_ele = Bool()
232ab42062eSxuzefan  val elemIdx = UInt(elemIdxBits.W) // element index
233748999d4Szhanglinjuan  val elemIdxInsideVd = UInt(elemIdxBits.W) // element index in scope of vd
234506ca2a3SAnzooooo  val trigger = TriggerAction()
23541c5202dSAnzooooo  val vstart         = UInt(elemIdxBits.W)
236d0d2c22dSAnzooooo  val vecTriggerMask = UInt((VLEN/8).W)
2373952421bSweiding liu  // val uopQueuePtr = new VluopPtr
2383952421bSweiding liu  // val flowPtr = new VlflowPtr
23920a5248fSzhanglinjuan}
24020a5248fSzhanglinjuan
24120a5248fSzhanglinjuanclass VecExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters {
24220a5248fSzhanglinjuan  val vec = new OnlyVecExuOutput
2430f9b302eSweiding liu  val alignedType       = UInt(alignTypeBits.W)
24426af847eSgood-circle   // feedback
24526af847eSgood-circle  val vecFeedback       = Bool()
24620a5248fSzhanglinjuan}
24720a5248fSzhanglinjuan
24820a5248fSzhanglinjuanclass VecUopBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp {
24920a5248fSzhanglinjuan  val flowMask       = UInt(VLENB.W) // each bit for a flow
25020a5248fSzhanglinjuan  val byteMask       = UInt(VLENB.W) // each bit for a byte
25120a5248fSzhanglinjuan  val data           = UInt(VLEN.W)
25220a5248fSzhanglinjuan  // val fof            = Bool() // fof is only used for vector loads
25320a5248fSzhanglinjuan  val excp_eew_index = UInt(elemIdxBits.W)
25420a5248fSzhanglinjuan  // val exceptionVec   = ExceptionVec() // uop has exceptionVec
25520a5248fSzhanglinjuan  val baseAddr = UInt(VAddrBits.W)
25620a5248fSzhanglinjuan  val stride = UInt(VLEN.W)
25720a5248fSzhanglinjuan  val flow_counter = UInt(flowIdxBits.W)
25820a5248fSzhanglinjuan
25920a5248fSzhanglinjuan  // instruction decode result
26020a5248fSzhanglinjuan  val flowNum = UInt(flowIdxBits.W) // # of flows in a uop
26120a5248fSzhanglinjuan  // val flowNumLog2 = UInt(log2Up(flowIdxBits).W) // log2(flowNum), for better timing of multiplication
26220a5248fSzhanglinjuan  val nfields = UInt(fieldBits.W) // NFIELDS
26320a5248fSzhanglinjuan  val vm = Bool() // whether vector masking is enabled
26420a5248fSzhanglinjuan  val usWholeReg = Bool() // unit-stride, whole register load
26520a5248fSzhanglinjuan  val usMaskReg = Bool() // unit-stride, masked store/load
266d73f3705SAnzo  val eew = VEew() // size of memory elements
26720a5248fSzhanglinjuan  val sew = UInt(ewBits.W)
26820a5248fSzhanglinjuan  val emul = UInt(mulBits.W)
26920a5248fSzhanglinjuan  val lmul = UInt(mulBits.W)
27020a5248fSzhanglinjuan  val vlmax = UInt(elemIdxBits.W)
27120a5248fSzhanglinjuan  val instType = UInt(3.W)
272c98a84acSzhanglinjuan  val vd_last_uop = Bool()
273c98a84acSzhanglinjuan  val vd_first_uop = Bool()
27420a5248fSzhanglinjuan}
27520a5248fSzhanglinjuan
27620a5248fSzhanglinjuanclass VecFlowBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp {
27720a5248fSzhanglinjuan  val vaddr             = UInt(VAddrBits.W)
27820a5248fSzhanglinjuan  val mask              = UInt(VLENB.W)
27920a5248fSzhanglinjuan  val alignedType       = UInt(alignTypeBits.W)
2809ac5754fSweiding liu  val vecActive         = Bool()
281ab42062eSxuzefan  val elemIdx           = UInt(elemIdxBits.W)
28220a5248fSzhanglinjuan  val is_first_ele      = Bool()
2830f9b302eSweiding liu
2840f9b302eSweiding liu  // pack
2850f9b302eSweiding liu  val isPackage         = Bool()
28600e6f2e2Sweiding liu  val packageNum        = UInt((log2Up(VLENB) + 1).W)
2870f9b302eSweiding liu  val originAlignedType = UInt(alignTypeBits.W)
28820a5248fSzhanglinjuan}
28920a5248fSzhanglinjuan
29026af847eSgood-circleclass VecMemExuOutput(isVector: Boolean = false)(implicit p: Parameters) extends VLSUBundle{
29126af847eSgood-circle  val output = new MemExuOutput(isVector)
29226af847eSgood-circle  val vecFeedback = Bool()
293780e55f4SYanqin Li  val nc = Bool()
29426af847eSgood-circle  val mmio = Bool()
29526af847eSgood-circle  val usSecondInv = Bool()
296da51a7acSAnzo  val hasException = Bool()
29726af847eSgood-circle  val elemIdx = UInt(elemIdxBits.W)
29826af847eSgood-circle  val alignedType = UInt(alignTypeBits.W)
299ebb914e7Sweiding liu  val mbIndex     = UInt(vsmBindexBits.W)
30055178b77Sweiding liu  val mask        = UInt(VLENB.W)
301db6cfb5aSHaoyuan Feng  val vaddr       = UInt(XLEN.W)
30246e9ee74SHaoyuan Feng  val vaNeedExt   = Bool()
303a53daa0fSHaoyuan Feng  val gpaddr      = UInt(GPAddrBits.W)
304ad415ae0SXiaokun-Pei  val isForVSnonLeafPTE = Bool()
305d0d2c22dSAnzooooo  val vecTriggerMask = UInt((VLEN/8).W)
30626af847eSgood-circle}
30726af847eSgood-circle
30820a5248fSzhanglinjuanobject MulNum {
30920a5248fSzhanglinjuan  def apply (mul: UInt): UInt = { //mul means emul or lmul
31020a5248fSzhanglinjuan    (LookupTree(mul,List(
31120a5248fSzhanglinjuan      "b101".U -> 1.U , // 1/8
31220a5248fSzhanglinjuan      "b110".U -> 1.U , // 1/4
31320a5248fSzhanglinjuan      "b111".U -> 1.U , // 1/2
31420a5248fSzhanglinjuan      "b000".U -> 1.U , // 1
31520a5248fSzhanglinjuan      "b001".U -> 2.U , // 2
31620a5248fSzhanglinjuan      "b010".U -> 4.U , // 4
31720a5248fSzhanglinjuan      "b011".U -> 8.U   // 8
31820a5248fSzhanglinjuan    )))}
31920a5248fSzhanglinjuan}
32020a5248fSzhanglinjuan/**
32120a5248fSzhanglinjuan  * when emul is greater than or equal to 1, this means the entire register needs to be written;
32220a5248fSzhanglinjuan  * otherwise, only write the specified number of bytes */
32320a5248fSzhanglinjuanobject MulDataSize {
32420a5248fSzhanglinjuan  def apply (mul: UInt): UInt = { //mul means emul or lmul
32520a5248fSzhanglinjuan    (LookupTree(mul,List(
32620a5248fSzhanglinjuan      "b101".U -> 2.U  , // 1/8
32720a5248fSzhanglinjuan      "b110".U -> 4.U  , // 1/4
32820a5248fSzhanglinjuan      "b111".U -> 8.U  , // 1/2
32920a5248fSzhanglinjuan      "b000".U -> 16.U , // 1
33020a5248fSzhanglinjuan      "b001".U -> 16.U , // 2
33120a5248fSzhanglinjuan      "b010".U -> 16.U , // 4
33220a5248fSzhanglinjuan      "b011".U -> 16.U   // 8
33320a5248fSzhanglinjuan    )))}
33420a5248fSzhanglinjuan}
33520a5248fSzhanglinjuan
33620a5248fSzhanglinjuanobject OneRegNum {
33720a5248fSzhanglinjuan  def apply (eew: UInt): UInt = { //mul means emul or lmul
338d73f3705SAnzo    require(eew.getWidth == 2, "The eew width must be 2.")
33920a5248fSzhanglinjuan    (LookupTree(eew, List(
340d73f3705SAnzo      "b00".U -> 16.U , // 1
341d73f3705SAnzo      "b01".U ->  8.U , // 2
342d73f3705SAnzo      "b10".U ->  4.U , // 4
343d73f3705SAnzo      "b11".U ->  2.U   // 8
34420a5248fSzhanglinjuan    )))}
34520a5248fSzhanglinjuan}
34620a5248fSzhanglinjuan
34720a5248fSzhanglinjuan//index inst read data byte
34820a5248fSzhanglinjuanobject SewDataSize {
34920a5248fSzhanglinjuan  def apply (sew: UInt): UInt = {
35020a5248fSzhanglinjuan    (LookupTree(sew,List(
35120a5248fSzhanglinjuan      "b000".U -> 1.U , // 1
35220a5248fSzhanglinjuan      "b001".U -> 2.U , // 2
35320a5248fSzhanglinjuan      "b010".U -> 4.U , // 4
35420a5248fSzhanglinjuan      "b011".U -> 8.U   // 8
35520a5248fSzhanglinjuan    )))}
35620a5248fSzhanglinjuan}
35720a5248fSzhanglinjuan
35820a5248fSzhanglinjuan// strided inst read data byte
35920a5248fSzhanglinjuanobject EewDataSize {
36020a5248fSzhanglinjuan  def apply (eew: UInt): UInt = {
361d73f3705SAnzo    require(eew.getWidth == 2, "The eew width must be 2.")
36220a5248fSzhanglinjuan    (LookupTree(eew, List(
363d73f3705SAnzo      "b00".U -> 1.U , // 1
364d73f3705SAnzo      "b01".U -> 2.U , // 2
365d73f3705SAnzo      "b10".U -> 4.U , // 4
366d73f3705SAnzo      "b11".U -> 8.U   // 8
36720a5248fSzhanglinjuan    )))}
36820a5248fSzhanglinjuan}
36920a5248fSzhanglinjuan
37020a5248fSzhanglinjuanobject loadDataSize {
37120a5248fSzhanglinjuan  def apply (instType: UInt, emul: UInt, eew: UInt, sew: UInt): UInt = {
37220a5248fSzhanglinjuan    (LookupTree(instType,List(
37320a5248fSzhanglinjuan      "b000".U ->  MulDataSize(emul), // unit-stride
37420a5248fSzhanglinjuan      "b010".U ->  EewDataSize(eew)  , // strided
37520a5248fSzhanglinjuan      "b001".U ->  SewDataSize(sew)  , // indexed-unordered
37620a5248fSzhanglinjuan      "b011".U ->  SewDataSize(sew)  , // indexed-ordered
37720a5248fSzhanglinjuan      "b100".U ->  EewDataSize(eew)  , // segment unit-stride
37820a5248fSzhanglinjuan      "b110".U ->  EewDataSize(eew)  , // segment strided
37920a5248fSzhanglinjuan      "b101".U ->  SewDataSize(sew)  , // segment indexed-unordered
38020a5248fSzhanglinjuan      "b111".U ->  SewDataSize(sew)    // segment indexed-ordered
38120a5248fSzhanglinjuan    )))}
38220a5248fSzhanglinjuan}
38320a5248fSzhanglinjuan
38420a5248fSzhanglinjuanobject storeDataSize {
38520a5248fSzhanglinjuan  def apply (instType: UInt, eew: UInt, sew: UInt): UInt = {
38620a5248fSzhanglinjuan    (LookupTree(instType,List(
38720a5248fSzhanglinjuan      "b000".U ->  EewDataSize(eew)  , // unit-stride, do not use
38820a5248fSzhanglinjuan      "b010".U ->  EewDataSize(eew)  , // strided
38920a5248fSzhanglinjuan      "b001".U ->  SewDataSize(sew)  , // indexed-unordered
39020a5248fSzhanglinjuan      "b011".U ->  SewDataSize(sew)  , // indexed-ordered
39120a5248fSzhanglinjuan      "b100".U ->  EewDataSize(eew)  , // segment unit-stride
39220a5248fSzhanglinjuan      "b110".U ->  EewDataSize(eew)  , // segment strided
39320a5248fSzhanglinjuan      "b101".U ->  SewDataSize(sew)  , // segment indexed-unordered
39420a5248fSzhanglinjuan      "b111".U ->  SewDataSize(sew)    // segment indexed-ordered
39520a5248fSzhanglinjuan    )))}
39620a5248fSzhanglinjuan}
39720a5248fSzhanglinjuan
39820a5248fSzhanglinjuan/**
39920a5248fSzhanglinjuan  * these are used to obtain immediate addresses for  index instruction */
40020a5248fSzhanglinjuanobject EewEq8 {
40120a5248fSzhanglinjuan  def apply(index:UInt, flow_inner_idx: UInt): UInt = {
40220a5248fSzhanglinjuan    (LookupTree(flow_inner_idx,List(
40320a5248fSzhanglinjuan      0.U  -> index(7 ,0   ),
40420a5248fSzhanglinjuan      1.U  -> index(15,8   ),
40520a5248fSzhanglinjuan      2.U  -> index(23,16  ),
40620a5248fSzhanglinjuan      3.U  -> index(31,24  ),
40720a5248fSzhanglinjuan      4.U  -> index(39,32  ),
40820a5248fSzhanglinjuan      5.U  -> index(47,40  ),
40920a5248fSzhanglinjuan      6.U  -> index(55,48  ),
41020a5248fSzhanglinjuan      7.U  -> index(63,56  ),
41120a5248fSzhanglinjuan      8.U  -> index(71,64  ),
41220a5248fSzhanglinjuan      9.U  -> index(79,72  ),
41320a5248fSzhanglinjuan      10.U -> index(87,80  ),
41420a5248fSzhanglinjuan      11.U -> index(95,88  ),
41520a5248fSzhanglinjuan      12.U -> index(103,96 ),
41620a5248fSzhanglinjuan      13.U -> index(111,104),
41720a5248fSzhanglinjuan      14.U -> index(119,112),
41820a5248fSzhanglinjuan      15.U -> index(127,120)
41920a5248fSzhanglinjuan    )))}
42020a5248fSzhanglinjuan}
42120a5248fSzhanglinjuan
42220a5248fSzhanglinjuanobject EewEq16 {
42320a5248fSzhanglinjuan  def apply(index: UInt, flow_inner_idx: UInt): UInt = {
42420a5248fSzhanglinjuan    (LookupTree(flow_inner_idx, List(
42520a5248fSzhanglinjuan      0.U -> index(15, 0),
42620a5248fSzhanglinjuan      1.U -> index(31, 16),
42720a5248fSzhanglinjuan      2.U -> index(47, 32),
42820a5248fSzhanglinjuan      3.U -> index(63, 48),
42920a5248fSzhanglinjuan      4.U -> index(79, 64),
43020a5248fSzhanglinjuan      5.U -> index(95, 80),
43120a5248fSzhanglinjuan      6.U -> index(111, 96),
43220a5248fSzhanglinjuan      7.U -> index(127, 112)
43320a5248fSzhanglinjuan    )))}
43420a5248fSzhanglinjuan}
43520a5248fSzhanglinjuan
43620a5248fSzhanglinjuanobject EewEq32 {
43720a5248fSzhanglinjuan  def apply(index: UInt, flow_inner_idx: UInt): UInt = {
43820a5248fSzhanglinjuan    (LookupTree(flow_inner_idx, List(
43920a5248fSzhanglinjuan      0.U -> index(31, 0),
44020a5248fSzhanglinjuan      1.U -> index(63, 32),
44120a5248fSzhanglinjuan      2.U -> index(95, 64),
44220a5248fSzhanglinjuan      3.U -> index(127, 96)
44320a5248fSzhanglinjuan    )))}
44420a5248fSzhanglinjuan}
44520a5248fSzhanglinjuan
44620a5248fSzhanglinjuanobject EewEq64 {
44720a5248fSzhanglinjuan  def apply (index: UInt, flow_inner_idx: UInt): UInt = {
44820a5248fSzhanglinjuan    (LookupTree(flow_inner_idx, List(
44920a5248fSzhanglinjuan      0.U -> index(63, 0),
45020a5248fSzhanglinjuan      1.U -> index(127, 64)
45120a5248fSzhanglinjuan    )))}
45220a5248fSzhanglinjuan}
45320a5248fSzhanglinjuan
45420a5248fSzhanglinjuanobject IndexAddr {
45520a5248fSzhanglinjuan  def apply (index: UInt, flow_inner_idx: UInt, eew: UInt): UInt = {
456d73f3705SAnzo    require(eew.getWidth == 2, "The eew width must be 2.")
45720a5248fSzhanglinjuan    (LookupTree(eew, List(
458d73f3705SAnzo      "b00".U -> EewEq8 (index = index, flow_inner_idx = flow_inner_idx ), // Imm is 1 Byte // TODO: index maybe cross register
459d73f3705SAnzo      "b01".U -> EewEq16(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 2 Byte
460d73f3705SAnzo      "b10".U -> EewEq32(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 4 Byte
461d73f3705SAnzo      "b11".U -> EewEq64(index = index, flow_inner_idx = flow_inner_idx )  // Imm is 8 Byte
46220a5248fSzhanglinjuan    )))}
46320a5248fSzhanglinjuan}
46420a5248fSzhanglinjuan
46520a5248fSzhanglinjuanobject Log2Num {
46620a5248fSzhanglinjuan  def apply (num: UInt): UInt = {
46720a5248fSzhanglinjuan    (LookupTree(num,List(
46820a5248fSzhanglinjuan      16.U -> 4.U,
46920a5248fSzhanglinjuan      8.U  -> 3.U,
47020a5248fSzhanglinjuan      4.U  -> 2.U,
47120a5248fSzhanglinjuan      2.U  -> 1.U,
47220a5248fSzhanglinjuan      1.U  -> 0.U
47320a5248fSzhanglinjuan    )))}
47420a5248fSzhanglinjuan}
47520a5248fSzhanglinjuan
476a5204571Szhanglinjuanobject GenUopIdxInField {
4770869ae56Sweiding liu  /**
4780869ae56Sweiding liu   * Used in normal vector instruction
4790869ae56Sweiding liu   * */
480a5204571Szhanglinjuan  def apply (instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = {
481a5204571Szhanglinjuan    val isIndexed = instType(0)
482a5204571Szhanglinjuan    val mulInField = Mux(
483a5204571Szhanglinjuan      isIndexed,
484a5204571Szhanglinjuan      Mux(lmul.asSInt > emul.asSInt, lmul, emul),
485a5204571Szhanglinjuan      emul
486a5204571Szhanglinjuan    )
487a5204571Szhanglinjuan    LookupTree(mulInField, List(
488a5204571Szhanglinjuan      "b101".U -> 0.U,
489a5204571Szhanglinjuan      "b110".U -> 0.U,
490a5204571Szhanglinjuan      "b111".U -> 0.U,
491a5204571Szhanglinjuan      "b000".U -> 0.U,
492a5204571Szhanglinjuan      "b001".U -> uopIdx(0),
493a5204571Szhanglinjuan      "b010".U -> uopIdx(1, 0),
494a5204571Szhanglinjuan      "b011".U -> uopIdx(2, 0)
495a5204571Szhanglinjuan    ))
496a5204571Szhanglinjuan  }
4970869ae56Sweiding liu  /**
4980869ae56Sweiding liu   *  Only used in segment instruction.
4990869ae56Sweiding liu   * */
5000869ae56Sweiding liu  def apply (select: UInt, uopIdx: UInt): UInt = {
5010869ae56Sweiding liu    LookupTree(select, List(
5020869ae56Sweiding liu      "b101".U -> 0.U,
5030869ae56Sweiding liu      "b110".U -> 0.U,
5040869ae56Sweiding liu      "b111".U -> 0.U,
5050869ae56Sweiding liu      "b000".U -> 0.U,
5060869ae56Sweiding liu      "b001".U -> uopIdx(0),
5070869ae56Sweiding liu      "b010".U -> uopIdx(1, 0),
5080869ae56Sweiding liu      "b011".U -> uopIdx(2, 0)
5090869ae56Sweiding liu    ))
5100869ae56Sweiding liu  }
511a5204571Szhanglinjuan}
512a5204571Szhanglinjuan
51320a5248fSzhanglinjuan//eew decode
51420a5248fSzhanglinjuanobject EewLog2 extends VLSUConstants {
51520a5248fSzhanglinjuan  // def apply (eew: UInt): UInt = {
51620a5248fSzhanglinjuan  //   (LookupTree(eew,List(
51720a5248fSzhanglinjuan  //     "b000".U -> "b000".U , // 1
51820a5248fSzhanglinjuan  //     "b101".U -> "b001".U , // 2
51920a5248fSzhanglinjuan  //     "b110".U -> "b010".U , // 4
52020a5248fSzhanglinjuan  //     "b111".U -> "b011".U   // 8
52120a5248fSzhanglinjuan  //   )))}
522d73f3705SAnzo  def apply(eew: UInt): UInt = {
523d73f3705SAnzo    require(eew.getWidth == 2, "The eew width must be 2.")
524d73f3705SAnzo    ZeroExt(eew, ewBits)
525d73f3705SAnzo  }
52620a5248fSzhanglinjuan}
52720a5248fSzhanglinjuan
528839e1a88SAnzoooooobject GenRealFlowNum {
52920a5248fSzhanglinjuan  /**
53020a5248fSzhanglinjuan   * unit-stride instructions don't use this method;
531d73f3705SAnzo   * other instructions generate realFlowNum by EmulDataSize >> eew,
53220a5248fSzhanglinjuan   * EmulDataSize means the number of bytes that need to be written to the register,
533d73f3705SAnzo   * eew means the number of bytes written at once.
534839e1a88SAnzooooo   *
535839e1a88SAnzooooo   * @param instType As the name implies.
536839e1a88SAnzooooo   * @param emul As the name implies.
537839e1a88SAnzooooo   * @param lmul As the name implies.
538839e1a88SAnzooooo   * @param eew As the name implies.
539839e1a88SAnzooooo   * @param sew As the name implies.
540839e1a88SAnzooooo   * @param isSegment Only modules related to segment need to be set to true.
541839e1a88SAnzooooo   * @return FlowNum of instruction.
542839e1a88SAnzooooo   *
543839e1a88SAnzooooo   */
544839e1a88SAnzooooo  def apply (instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, isSegment: Boolean = false): UInt = {
54532977e5dSAnzooooo    require(instType.getWidth == 3, "The instType width must be 3, (isSegment, mop)")
546d73f3705SAnzo    require(eew.getWidth == 2, "The eew width must be 2.")
547839e1a88SAnzooooo    // Because the new segmentunit is needed. But the previous implementation is retained for the time being in case of emergency.
548839e1a88SAnzooooo    val segmentIndexFlowNum =  if (isSegment) (MulDataSize(lmul) >> sew(1,0)).asUInt
549d73f3705SAnzo    else Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt)
55020a5248fSzhanglinjuan    (LookupTree(instType,List(
551d73f3705SAnzo      "b000".U ->  (MulDataSize(emul) >> eew).asUInt, // store use, load do not use
552d73f3705SAnzo      "b010".U ->  (MulDataSize(emul) >> eew).asUInt, // strided
553d73f3705SAnzo      "b001".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-unordered
554d73f3705SAnzo      "b011".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-ordered
555d73f3705SAnzo      "b100".U ->  (MulDataSize(emul) >> eew).asUInt, // segment unit-stride
556d73f3705SAnzo      "b110".U ->  (MulDataSize(emul) >> eew).asUInt, // segment strided
557839e1a88SAnzooooo      "b101".U ->  segmentIndexFlowNum, // segment indexed-unordered
558839e1a88SAnzooooo      "b111".U ->  segmentIndexFlowNum  // segment indexed-ordered
55920a5248fSzhanglinjuan    )))}
56020a5248fSzhanglinjuan}
56120a5248fSzhanglinjuan
562839e1a88SAnzoooooobject GenRealFlowLog2 extends VLSUConstants {
56320a5248fSzhanglinjuan  /**
56420a5248fSzhanglinjuan   * GenRealFlowLog2 = Log2(GenRealFlowNum)
565839e1a88SAnzooooo   *
566839e1a88SAnzooooo   * @param instType As the name implies.
567839e1a88SAnzooooo   * @param emul As the name implies.
568839e1a88SAnzooooo   * @param lmul As the name implies.
569839e1a88SAnzooooo   * @param eew As the name implies.
570839e1a88SAnzooooo   * @param sew As the name implies.
571839e1a88SAnzooooo   * @param isSegment Only modules related to segment need to be set to true.
572839e1a88SAnzooooo   * @return FlowNumLog2 of instruction.
57320a5248fSzhanglinjuan   */
574839e1a88SAnzooooo  def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, isSegment: Boolean = false): UInt = {
57532977e5dSAnzooooo    require(instType.getWidth == 3, "The instType width must be 3, (isSegment, mop)")
576d73f3705SAnzo    require(eew.getWidth == 2, "The eew width must be 2.")
57720a5248fSzhanglinjuan    val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul)
57820a5248fSzhanglinjuan    val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul)
579d73f3705SAnzo    val eewRealFlowLog2 = emulLog2 + log2Up(VLENB).U - eew
58020a5248fSzhanglinjuan    val sewRealFlowLog2 = lmulLog2 + log2Up(VLENB).U - sew(1, 0)
581839e1a88SAnzooooo    // Because the new segmentunit is needed. But the previous implementation is retained for the time being in case of emergency.
582839e1a88SAnzooooo    val segmentIndexFlowLog2 = if (isSegment) sewRealFlowLog2 else Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2)
58320a5248fSzhanglinjuan    (LookupTree(instType, List(
58420a5248fSzhanglinjuan      "b000".U -> eewRealFlowLog2, // unit-stride
58520a5248fSzhanglinjuan      "b010".U -> eewRealFlowLog2, // strided
58620a5248fSzhanglinjuan      "b001".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-unordered
58720a5248fSzhanglinjuan      "b011".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-ordered
58820a5248fSzhanglinjuan      "b100".U -> eewRealFlowLog2, // segment unit-stride
58920a5248fSzhanglinjuan      "b110".U -> eewRealFlowLog2, // segment strided
590839e1a88SAnzooooo      "b101".U -> segmentIndexFlowLog2, // segment indexed-unordered
591839e1a88SAnzooooo      "b111".U -> segmentIndexFlowLog2, // segment indexed-ordered
59220a5248fSzhanglinjuan    )))
59320a5248fSzhanglinjuan  }
59420a5248fSzhanglinjuan}
59520a5248fSzhanglinjuan
59620a5248fSzhanglinjuan/**
59720a5248fSzhanglinjuan  * GenElemIdx generals an element index within an instruction, given a certain uopIdx and a known flowIdx
59820a5248fSzhanglinjuan  * inside the uop.
59920a5248fSzhanglinjuan  */
60020a5248fSzhanglinjuanobject GenElemIdx extends VLSUConstants {
601748999d4Szhanglinjuan  def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt,
602748999d4Szhanglinjuan            uopIdx: UInt, flowIdx: UInt): UInt = {
603d73f3705SAnzo    require(eew.getWidth == 2, "The eew width must be 2.")
604748999d4Szhanglinjuan    val isIndexed = instType(0).asBool
605d73f3705SAnzo    val eewUopFlowsLog2 = Mux(emul.asSInt > 0.S, 0.U, emul) + log2Up(VLENB).U - eew
606748999d4Szhanglinjuan    val sewUopFlowsLog2 = Mux(lmul.asSInt > 0.S, 0.U, lmul) + log2Up(VLENB).U - sew(1, 0)
607748999d4Szhanglinjuan    val uopFlowsLog2 = Mux(
608748999d4Szhanglinjuan      isIndexed,
609748999d4Szhanglinjuan      Mux(emul.asSInt > lmul.asSInt, eewUopFlowsLog2, sewUopFlowsLog2),
610748999d4Szhanglinjuan      eewUopFlowsLog2
61120a5248fSzhanglinjuan    )
612748999d4Szhanglinjuan    LookupTree(uopFlowsLog2, List(
613*688cc4e8SAnzo      0.U -> uopIdx ## flowIdx(0), // for hardware misalign
614748999d4Szhanglinjuan      1.U -> uopIdx ## flowIdx(0),
615748999d4Szhanglinjuan      2.U -> uopIdx ## flowIdx(1, 0),
616748999d4Szhanglinjuan      3.U -> uopIdx ## flowIdx(2, 0),
617748999d4Szhanglinjuan      4.U -> uopIdx ## flowIdx(3, 0)
618748999d4Szhanglinjuan    ))
61920a5248fSzhanglinjuan  }
62020a5248fSzhanglinjuan}
62120a5248fSzhanglinjuan
62220a5248fSzhanglinjuan/**
62320a5248fSzhanglinjuan  * GenVLMAX calculates VLMAX, which equals MUL * ew
62420a5248fSzhanglinjuan  */
62520a5248fSzhanglinjuanobject GenVLMAXLog2 extends VLSUConstants {
62620a5248fSzhanglinjuan  def apply(lmul: UInt, sew: UInt): UInt = lmul + log2Up(VLENB).U - sew
62720a5248fSzhanglinjuan}
62820a5248fSzhanglinjuanobject GenVLMAX {
62920a5248fSzhanglinjuan  def apply(lmul: UInt, sew: UInt): UInt = 1.U << GenVLMAXLog2(lmul, sew)
63020a5248fSzhanglinjuan}
6310869ae56Sweiding liu/**
6320869ae56Sweiding liu * generate mask base on vlmax
6330869ae56Sweiding liu * example: vlmax = b100, max = b011
6340869ae56Sweiding liu * */
6350869ae56Sweiding liuobject GenVlMaxMask{
6360869ae56Sweiding liu  def apply(vlmax: UInt, length: Int): UInt = (vlmax - 1.U)(length-1, 0)
6370869ae56Sweiding liu}
63820a5248fSzhanglinjuan
63920a5248fSzhanglinjuanobject GenUSWholeRegVL extends VLSUConstants {
64020a5248fSzhanglinjuan  def apply(nfields: UInt, eew: UInt): UInt = {
641d73f3705SAnzo    require(eew.getWidth == 2, "The eew width must be 2.")
642d73f3705SAnzo    LookupTree(eew, List(
64320a5248fSzhanglinjuan      "b00".U -> (nfields << (log2Up(VLENB) - 0)),
64420a5248fSzhanglinjuan      "b01".U -> (nfields << (log2Up(VLENB) - 1)),
64520a5248fSzhanglinjuan      "b10".U -> (nfields << (log2Up(VLENB) - 2)),
64620a5248fSzhanglinjuan      "b11".U -> (nfields << (log2Up(VLENB) - 3))
64720a5248fSzhanglinjuan    ))
64820a5248fSzhanglinjuan  }
64920a5248fSzhanglinjuan}
65006cb2bc1Sweidingliuobject GenUSWholeEmul extends VLSUConstants{
65106cb2bc1Sweidingliu  def apply(nf: UInt): UInt={
65206cb2bc1Sweidingliu    LookupTree(nf,List(
65306cb2bc1Sweidingliu      "b000".U -> "b000".U(mulBits.W),
65406cb2bc1Sweidingliu      "b001".U -> "b001".U(mulBits.W),
65506cb2bc1Sweidingliu      "b011".U -> "b010".U(mulBits.W),
65606cb2bc1Sweidingliu      "b111".U -> "b011".U(mulBits.W)
65706cb2bc1Sweidingliu    ))
65806cb2bc1Sweidingliu  }
65906cb2bc1Sweidingliu}
66006cb2bc1Sweidingliu
66106cb2bc1Sweidingliu
66220a5248fSzhanglinjuanobject GenUSMaskRegVL extends VLSUConstants {
66320a5248fSzhanglinjuan  def apply(vl: UInt): UInt = {
664115faeaaSweiding liu    Mux(vl(2,0) === 0.U , (vl >> 3.U), ((vl >> 3.U) + 1.U))
66520a5248fSzhanglinjuan  }
66620a5248fSzhanglinjuan}
66720a5248fSzhanglinjuan
66820a5248fSzhanglinjuanobject GenUopByteMask {
66920a5248fSzhanglinjuan  def apply(flowMask: UInt, alignedType: UInt): UInt = {
67020a5248fSzhanglinjuan    LookupTree(alignedType, List(
67100e6f2e2Sweiding liu      "b000".U -> flowMask,
67200e6f2e2Sweiding liu      "b001".U -> FillInterleaved(2, flowMask),
67300e6f2e2Sweiding liu      "b010".U -> FillInterleaved(4, flowMask),
67400e6f2e2Sweiding liu      "b011".U -> FillInterleaved(8, flowMask),
67500e6f2e2Sweiding liu      "b100".U -> FillInterleaved(16, flowMask)
67620a5248fSzhanglinjuan    ))
67720a5248fSzhanglinjuan  }
67820a5248fSzhanglinjuan}
67920a5248fSzhanglinjuan
680a5204571Szhanglinjuanobject GenVdIdxInField extends VLSUConstants {
6812838e2b9Szhanglinjuan  def apply(instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = {
6822838e2b9Szhanglinjuan    val vdIdx = Wire(UInt(log2Up(maxMUL).W))
6832838e2b9Szhanglinjuan    when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || lmul.asSInt > emul.asSInt) {
6842838e2b9Szhanglinjuan      // Unit-stride or Strided, or indexed with lmul >= emul
6852838e2b9Szhanglinjuan      vdIdx := uopIdx
6862838e2b9Szhanglinjuan    }.otherwise {
6872838e2b9Szhanglinjuan      // Indexed with lmul <= emul
6882838e2b9Szhanglinjuan      val multiple = emul - lmul
6892838e2b9Szhanglinjuan      val uopIdxWidth = uopIdx.getWidth
6902838e2b9Szhanglinjuan      vdIdx := LookupTree(multiple, List(
6912838e2b9Szhanglinjuan        0.U -> uopIdx,
692a5204571Szhanglinjuan        1.U -> (uopIdx >> 1),
693a5204571Szhanglinjuan        2.U -> (uopIdx >> 2),
694a5204571Szhanglinjuan        3.U -> (uopIdx >> 3)
6952838e2b9Szhanglinjuan      ))
6962838e2b9Szhanglinjuan    }
6972838e2b9Szhanglinjuan    vdIdx
69820a5248fSzhanglinjuan  }
69920a5248fSzhanglinjuan}
7000f9b302eSweiding liu/**
7010f9b302eSweiding liu* Use start and vl to generate flow activative mask
7020f9b302eSweiding liu* mod = true fill 0
7030f9b302eSweiding liu* mod = false fill 1
7040f9b302eSweiding liu*/
7050f9b302eSweiding liuobject GenFlowMask extends VLSUConstants {
7060f9b302eSweiding liu  def apply(elementMask: UInt, start: UInt, vl: UInt , mod: Boolean): UInt = {
7070f9b302eSweiding liu    val startMask = ~UIntToMask(start, VLEN)
7080f9b302eSweiding liu    val vlMask = UIntToMask(vl, VLEN)
7090f9b302eSweiding liu    val maskVlStart = vlMask & startMask
7100f9b302eSweiding liu    if(mod){
7110f9b302eSweiding liu      elementMask & maskVlStart
7120f9b302eSweiding liu    }
7130f9b302eSweiding liu    else{
7140f9b302eSweiding liu      (~elementMask).asUInt & maskVlStart
7150f9b302eSweiding liu    }
7160f9b302eSweiding liu  }
7170f9b302eSweiding liu}
7180f9b302eSweiding liu
71900e6f2e2Sweiding liuobject genVWmask128 {
72000e6f2e2Sweiding liu  def apply(addr: UInt, sizeEncode: UInt): UInt = {
72100e6f2e2Sweiding liu    (LookupTree(sizeEncode, List(
72200e6f2e2Sweiding liu      "b000".U -> 0x1.U, //0001 << addr(2:0)
72300e6f2e2Sweiding liu      "b001".U -> 0x3.U, //0011
72400e6f2e2Sweiding liu      "b010".U -> 0xf.U, //1111
72500e6f2e2Sweiding liu      "b011".U -> 0xff.U, //11111111
7263c808de0SAnzo      "b100".U -> 0xffff.U, //1111111111111111
7273c808de0SAnzo      "b111".U -> 0xffff.U  //cbo
72800e6f2e2Sweiding liu    )) << addr(3, 0)).asUInt
72900e6f2e2Sweiding liu  }
73000e6f2e2Sweiding liu}
73100e6f2e2Sweiding liu/*
73200e6f2e2Sweiding liu* only use in max length is 128
73300e6f2e2Sweiding liu*/
73400e6f2e2Sweiding liuobject genVWdata {
73500e6f2e2Sweiding liu  def apply(data: UInt, sizeEncode: UInt): UInt = {
73600e6f2e2Sweiding liu    LookupTree(sizeEncode, List(
73700e6f2e2Sweiding liu      "b000".U -> Fill(16, data(7, 0)),
73800e6f2e2Sweiding liu      "b001".U -> Fill(8, data(15, 0)),
73900e6f2e2Sweiding liu      "b010".U -> Fill(4, data(31, 0)),
74000e6f2e2Sweiding liu      "b011".U -> Fill(2, data(63,0)),
74100e6f2e2Sweiding liu      "b100".U -> data(127,0)
74200e6f2e2Sweiding liu    ))
74300e6f2e2Sweiding liu  }
74400e6f2e2Sweiding liu}
7453952421bSweiding liu
7463952421bSweiding liuobject genUSSplitAddr{
747b5287751SAnzo  def apply(addr: UInt, index: UInt, width: Int): UInt = {
748b5287751SAnzo    val tmpAddr = Cat(addr(width - 1, 4), 0.U(4.W))
7493952421bSweiding liu    val nextCacheline = tmpAddr + 16.U
7503952421bSweiding liu    LookupTree(index, List(
7513952421bSweiding liu      0.U -> tmpAddr,
7523952421bSweiding liu      1.U -> nextCacheline
7533952421bSweiding liu    ))
7543952421bSweiding liu  }
7553952421bSweiding liu}
7563952421bSweiding liu
7573952421bSweiding liuobject genUSSplitMask{
758c8d442a6Sweiding liu  def apply(mask: UInt, index: UInt): UInt = {
759c8d442a6Sweiding liu    require(mask.getWidth == 32) // need to be 32-bits
7603952421bSweiding liu    LookupTree(index, List(
761c8d442a6Sweiding liu      0.U -> mask(15, 0),
762c8d442a6Sweiding liu      1.U -> mask(31, 16),
7633952421bSweiding liu    ))
7643952421bSweiding liu  }
7653952421bSweiding liu}
7663952421bSweiding liu
7673952421bSweiding liuobject genUSSplitData{
7683952421bSweiding liu  def apply(data: UInt, index: UInt, addrOffset: UInt): UInt = {
7693952421bSweiding liu    val tmpData = WireInit(0.U(256.W))
7703952421bSweiding liu    val lookupTable = (0 until 16).map{case i =>
7713952421bSweiding liu      if(i == 0){
7723952421bSweiding liu        i.U -> Cat(0.U(128.W), data)
7733952421bSweiding liu      }else{
7743952421bSweiding liu        i.U -> Cat(0.U(((16-i)*8).W), data, 0.U((i*8).W))
7753952421bSweiding liu      }
7763952421bSweiding liu    }
7773952421bSweiding liu    tmpData := LookupTree(addrOffset, lookupTable).asUInt
7783952421bSweiding liu
7793952421bSweiding liu    LookupTree(index, List(
7803952421bSweiding liu      0.U -> tmpData(127, 0),
7813952421bSweiding liu      1.U -> tmpData(255, 128)
7823952421bSweiding liu    ))
7833952421bSweiding liu  }
7843952421bSweiding liu}
7853952421bSweiding liu
78608047a41SAnzoooooobject genVSData extends VLSUConstants {
7873952421bSweiding liu  def apply(data: UInt, elemIdx: UInt, alignedType: UInt): UInt = {
7883952421bSweiding liu    LookupTree(alignedType, List(
7893952421bSweiding liu      "b000".U -> ZeroExt(LookupTree(elemIdx(3, 0), List.tabulate(VLEN/8)(i => i.U -> getByte(data, i))), VLEN),
7903952421bSweiding liu      "b001".U -> ZeroExt(LookupTree(elemIdx(2, 0), List.tabulate(VLEN/16)(i => i.U -> getHalfWord(data, i))), VLEN),
7913952421bSweiding liu      "b010".U -> ZeroExt(LookupTree(elemIdx(1, 0), List.tabulate(VLEN/32)(i => i.U -> getWord(data, i))), VLEN),
7923952421bSweiding liu      "b011".U -> ZeroExt(LookupTree(elemIdx(0), List.tabulate(VLEN/64)(i => i.U -> getDoubleWord(data, i))), VLEN),
7933952421bSweiding liu      "b100".U -> data // if have wider element, it will broken
7943952421bSweiding liu    ))
7953952421bSweiding liu  }
7963952421bSweiding liu}
79761054c5cSAnzooooo
79861054c5cSAnzooooo// TODO: more elegant
79961054c5cSAnzoooooobject genVStride extends VLSUConstants {
80061054c5cSAnzooooo  def apply(uopIdx: UInt, stride: UInt): UInt = {
80161054c5cSAnzooooo    LookupTree(uopIdx, List(
80261054c5cSAnzooooo      0.U -> 0.U,
80361054c5cSAnzooooo      1.U -> stride,
80461054c5cSAnzooooo      2.U -> (stride << 1),
80561054c5cSAnzooooo      3.U -> ((stride << 1).asUInt + stride),
80661054c5cSAnzooooo      4.U -> (stride << 2),
80761054c5cSAnzooooo      5.U -> ((stride << 2).asUInt + stride),
80861054c5cSAnzooooo      6.U -> ((stride << 2).asUInt + (stride << 1)),
80961054c5cSAnzooooo      7.U -> ((stride << 2).asUInt + (stride << 1) + stride)
81061054c5cSAnzooooo    ))
81161054c5cSAnzooooo  }
81261054c5cSAnzooooo}
81388884326Sweiding liu/**
81488884326Sweiding liu * generate uopOffset, not used in segment instruction
81588884326Sweiding liu * */
81661054c5cSAnzoooooobject genVUopOffset extends VLSUConstants {
81772439acfSAnzooooo  def apply(instType: UInt, isfof: Bool, uopidx: UInt, nf: UInt, eew: UInt, stride: UInt, alignedType: UInt): UInt = {
81861054c5cSAnzooooo    val uopInsidefield = (uopidx >> nf).asUInt // when nf == 0, is uopidx
81972439acfSAnzooooo
820df3b4b92SAnzooooo//    val fofVUopOffset = (LookupTree(instType,List(
821df3b4b92SAnzooooo//      "b000".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew)   ) , // unit-stride fof
822df3b4b92SAnzooooo//      "b100".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew)   ) , // segment unit-stride fof
823df3b4b92SAnzooooo//    ))).asUInt
82472439acfSAnzooooo
82572439acfSAnzooooo    val otherVUopOffset = (LookupTree(instType,List(
82672439acfSAnzooooo      "b000".U -> ( uopInsidefield << alignedType                                   ) , // unit-stride
82761054c5cSAnzooooo      "b010".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew)   ) , // strided
82861054c5cSAnzooooo      "b001".U -> ( 0.U                                                             ) , // indexed-unordered
82961054c5cSAnzooooo      "b011".U -> ( 0.U                                                             ) , // indexed-ordered
83072439acfSAnzooooo      "b100".U -> ( uopInsidefield << alignedType                                   ) , // segment unit-stride
83161054c5cSAnzooooo      "b110".U -> ( genVStride(uopInsidefield, stride) << (log2Up(VLENB).U - eew)   ) , // segment strided
83261054c5cSAnzooooo      "b101".U -> ( 0.U                                                             ) , // segment indexed-unordered
83361054c5cSAnzooooo      "b111".U -> ( 0.U                                                             )   // segment indexed-ordered
83461054c5cSAnzooooo    ))).asUInt
83572439acfSAnzooooo
836df3b4b92SAnzooooo//    Mux(isfof, fofVUopOffset, otherVUopOffset)
837df3b4b92SAnzooooo    otherVUopOffset
83861054c5cSAnzooooo  }
83961054c5cSAnzooooo}
8405dc0f712SAnzooooo
8415dc0f712SAnzooooo
8425dc0f712SAnzooooo
84347986d36SAnzoobject genVFirstUnmask extends VLSUConstants {
84447986d36SAnzo  /**
84547986d36SAnzo   * Find the lowest unmasked number of bits.
84647986d36SAnzo   * example:
84747986d36SAnzo   *   mask = 16'b1111_1111_1110_0000
84847986d36SAnzo   *   return 5
84947986d36SAnzo   * @param mask 16bits of mask.
85047986d36SAnzo   * @return lowest unmasked number of bits.
85147986d36SAnzo   */
8525dc0f712SAnzooooo  def apply(mask: UInt): UInt = {
8535dc0f712SAnzooooo    require(mask.getWidth == 16, "The mask width must be 16")
8545dc0f712SAnzooooo    val select = (0 until 16).zip(mask.asBools).map{case (i, v) =>
8555dc0f712SAnzooooo      (v, i.U)
8565dc0f712SAnzooooo    }
8575dc0f712SAnzooooo    PriorityMuxDefault(select, 0.U)
8585dc0f712SAnzooooo  }
8595dc0f712SAnzooooo
8605dc0f712SAnzooooo  def apply(mask: UInt, regOffset: UInt): UInt = {
8615dc0f712SAnzooooo    require(mask.getWidth == 16, "The mask width must be 16")
8625dc0f712SAnzooooo    val realMask = (mask >> regOffset).asUInt
8635dc0f712SAnzooooo    val select = (0 until 16).zip(realMask.asBools).map{case (i, v) =>
8645dc0f712SAnzooooo      (v, i.U)
8655dc0f712SAnzooooo    }
8665dc0f712SAnzooooo    PriorityMuxDefault(select, 0.U)
8675dc0f712SAnzooooo  }
8685dc0f712SAnzooooo}
8695dc0f712SAnzooooo
87008b0bc30Shappy-lxclass skidBufferConnect[T <: Data](gen: T) extends Module {
87108b0bc30Shappy-lx  val io = IO(new Bundle() {
87208b0bc30Shappy-lx    val in = Flipped(DecoupledIO(gen.cloneType))
87308b0bc30Shappy-lx    val flush = Input(Bool())
87408b0bc30Shappy-lx    val out = DecoupledIO(gen.cloneType)
87508b0bc30Shappy-lx  })
87608b0bc30Shappy-lx
87708b0bc30Shappy-lx  skidBuffer.connect(io.in, io.out, io.flush)
87808b0bc30Shappy-lx}
87908b0bc30Shappy-lx
88008b0bc30Shappy-lxobject skidBuffer{
88108b0bc30Shappy-lx  /*
88208b0bc30Shappy-lx  * Skid Buffer used to break timing path of ready
88308b0bc30Shappy-lx  * */
88408b0bc30Shappy-lx  def connect[T <: Data](
88508b0bc30Shappy-lx                          in: DecoupledIO[T],
88608b0bc30Shappy-lx                          out: DecoupledIO[T],
88708b0bc30Shappy-lx                          flush: Bool
88808b0bc30Shappy-lx                        ): T = {
88908b0bc30Shappy-lx    val empty :: skid :: Nil = Enum(2)
89008b0bc30Shappy-lx    val state      = RegInit(empty)
89108b0bc30Shappy-lx    val stateNext  = WireInit(empty)
89208b0bc30Shappy-lx    val dataBuffer = RegEnable(in.bits, (!out.ready && in.fire))
89308b0bc30Shappy-lx
89408b0bc30Shappy-lx    when(state === empty){
89508b0bc30Shappy-lx      stateNext := Mux(!out.ready && in.fire && !flush, skid, empty)
89608b0bc30Shappy-lx    }.elsewhen(state === skid){
89708b0bc30Shappy-lx      stateNext := Mux(out.ready || flush, empty, skid)
89808b0bc30Shappy-lx    }
89908b0bc30Shappy-lx    state     := stateNext
90008b0bc30Shappy-lx
90108b0bc30Shappy-lx    in.ready  := state === empty
90208b0bc30Shappy-lx    out.bits  := Mux(state === skid, dataBuffer, in.bits)
90308b0bc30Shappy-lx    out.valid := in.valid || (state === skid)
90408b0bc30Shappy-lx
90508b0bc30Shappy-lx    dataBuffer
90608b0bc30Shappy-lx  }
90708b0bc30Shappy-lx  def apply[T <: Data](
90808b0bc30Shappy-lx                        in: DecoupledIO[T],
90908b0bc30Shappy-lx                        out: DecoupledIO[T],
91008b0bc30Shappy-lx                        flush: Bool,
91108b0bc30Shappy-lx                        moduleName: String
912233f2ad0Szhanglinjuan                      ): Unit = {
91308b0bc30Shappy-lx    val buffer = Module(new skidBufferConnect(in.bits))
91408b0bc30Shappy-lx    buffer.suggestName(moduleName)
91508b0bc30Shappy-lx    buffer.io.in <> in
91608b0bc30Shappy-lx    buffer.io.flush := flush
91708b0bc30Shappy-lx    out <> buffer.io.out
91808b0bc30Shappy-lx  }
91908b0bc30Shappy-lx}
92008b0bc30Shappy-lx
921