xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VSegmentUnit.scala (revision f346d72749d2a958fafe5aea9f317b64283b416e)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27import xiangshan.mem._
28import xiangshan.backend.fu.{FuType, PMPRespBundle}
29import freechips.rocketchip.diplomacy.BufferParams
30import xiangshan.cache.mmu._
31import xiangshan.cache._
32import xiangshan.cache.wpu.ReplayCarry
33import xiangshan.backend.fu.util.SdtrigExt
34import xiangshan.ExceptionNO._
35import xiangshan.backend.fu.vector.Bundles.{VConfig, VType}
36import xiangshan.backend.datapath.NewPipelineConnect
37import xiangshan.backend.fu.NewCSR._
38import xiangshan.backend.fu.vector.Utils.VecDataToMaskDataVec
39
40class VSegmentBundle(implicit p: Parameters) extends VLSUBundle
41{
42  val baseVaddr        = UInt(XLEN.W)
43  val uop              = new DynInst
44  val paddr            = UInt(PAddrBits.W)
45  val mask             = UInt(VLEN.W)
46  val alignedType      = UInt(alignTypeBits.W)
47  val vl               = UInt(elemIdxBits.W)
48  val uopFlowNum       = UInt(elemIdxBits.W)
49  val uopFlowNumMask   = UInt(elemIdxBits.W)
50  val isVSegLoad       = Bool()
51  val isVSegStore      = Bool()
52  // for exception
53  val vstart           = UInt(elemIdxBits.W)
54  val exceptionVaddr   = UInt(XLEN.W)
55  val exceptionGpaddr  = UInt(XLEN.W)
56  val exceptionIsForVSnonLeafPTE = Bool()
57  val exception_va     = Bool()
58  val exception_gpa    = Bool()
59  val exception_pa     = Bool()
60  val exceptionVstart  = UInt(elemIdxBits.W)
61  // valid: have fof exception but can not trigger, need update all writebacked uop.vl with exceptionVl
62  val exceptionVl      = ValidIO(UInt(elemIdxBits.W))
63  val isFof            = Bool()
64}
65
66// latch each uop's VecWen, pdest, v0Wen, uopIdx
67class VSegmentUop(implicit p: Parameters) extends VLSUBundle{
68  val uop              = new DynInst
69}
70
71class VSegmentUnit (implicit p: Parameters) extends VLSUModule
72  with HasDCacheParameters
73  with MemoryOpConstants
74  with SdtrigExt
75  with HasLoadHelper
76{
77  val io               = IO(new VSegmentUnitIO)
78
79  val maxSize          = VSegmentBufferSize
80
81  class VSegUPtr(implicit p: Parameters) extends CircularQueuePtr[VSegUPtr](maxSize){
82  }
83
84  object VSegUPtr {
85    def apply(f: Bool, v: UInt)(implicit p: Parameters): VSegUPtr = {
86      val ptr           = Wire(new VSegUPtr)
87      ptr.flag         := f
88      ptr.value        := v
89      ptr
90    }
91  }
92
93  val maxSplitNum = 2
94
95  /**
96  ********************************************************************************************************
97  *  Use an example to illustrate the working logic of a segmentunit:                                    *
98  *    For:                                                                                              *
99  *      lmul=2 sew=32 emul=2 eew=32  vl=16                                                              *
100  *    Then:                                                                                             *
101  *      Access memory in the order:                                                                     *
102  *        (V2,S0),(V4,S0),(V6,S0),(V8,S0),                                                              *
103  *        (V2,S1),(V4,S1),(V6,S1),(V8,S1),                                                              *
104  *        (V2,S2),(V4,S2),(V6,S2),(V8,S2),                                                              *
105  *        (V2,S3),(V4,S3),(V6,S3),(V8,S3),                                                              *
106  *        (V3,S4),(V5,S4),(V7,S4),(V9,S4),                                                              *
107  *        (V3,S5),(V5,S5),(V7,S5),(V9,S5),                                                              *
108  *        (V3,S6),(V5,S6),(V7,S6),(V9,S6),                                                              *
109  *        (V3,S7),(V5,S7),(V7,S7),(V9,S7),                                                              *
110  *                                                                                                      *
111  *                                                                                                      *
112  *    [[data]] saves the data generated by the access and corresponds to the register.                  *
113  *    [[splitPtr]] controls the destination register written to.                                        *
114  *                                                                                                      *
115  *    splitptr offset can be seen in [[splitPtrNext]] is assignment logic,                              *
116  *    which is mainly calculated in terms of [[fieldIdx]] and [[segmentIdx]]                            *
117  *    First access different fields of the same segment, and then visit different segments.             *
118  *    For the case of 'emul' greater than 1, such as the following example,                             *
119  *    although 'v2' and 'v3' are different vd and the same field, they are still different segments,    *
120  *    so they should be accessed sequentially.Just like the 'Access memory in the order' above.         *
121  *                                                                                                      *
122  *                         [[segmentIdx]]                                                               *
123  *                               |                                                                      *
124  *                               |                                                                      *
125  *                               V                                                                      *
126  *                                                                                                      *
127  *                               S0               S1                S2                 S3               *
128  *                      ----------------------------------------------------------------------------    *
129  *  [[splitPtr]]--> v2  |     field0     |      field0     |      field0     |      field0         |    *
130  *                      ----------------------------------------------------------------------------    *
131  *                               S4               S5                S6                 S7               *
132  *                      ----------------------------------------------------------------------------    *
133  *                  v3  |     field0     |      field0     |      field0     |      field0         |    *
134  *                      ----------------------------------------------------------------------------    *
135  *                               S0               S1                S2                 S3               *
136  *                      ----------------------------------------------------------------------------    *
137  *                  v4  |     field1     |      field1     |      field1     |      field1         |    *
138  *                      ----------------------------------------------------------------------------    *
139  *                               S4               S5                S6                 S7               *
140  *                      ----------------------------------------------------------------------------    *
141  *                  v5  |     field1     |      field1     |      field1     |      field1         |    *
142  *                      ----------------------------------------------------------------------------    *
143  *                               S0               S1                S2                 S3               *
144  *                      ----------------------------------------------------------------------------    *
145  *                  v6  |     field2     |      field2     |      field2     |      field2         |    *
146  *                      ----------------------------------------------------------------------------    *
147  *                               S4               S5                S6                 S7               *
148  *                      ----------------------------------------------------------------------------    *
149  *                  v7  |     field2     |      field2     |      field2     |      field2         |    *
150  *                      ----------------------------------------------------------------------------    *
151  *                               S0               S1                S2                 S3               *
152  *                      ----------------------------------------------------------------------------    *
153  *                  v8  |     field3     |      field3     |      field3     |      field3         |    *
154  *                      ----------------------------------------------------------------------------    *
155  *                               S4               S5                S6                 S7               *
156  *                      ----------------------------------------------------------------------------    *
157  *                  v9  |     field3     |      field3     |      field3     |      field3         |    *
158  *                      ----------------------------------------------------------------------------    *                                                                                    *
159  *                                                                                                      *                                                                                    *
160  *                                                                                                      *                                                                                    *
161  ********************************************************************************************************
162  **/
163
164
165  // buffer uop
166  val instMicroOp       = Reg(new VSegmentBundle)
167  val instMicroOpValid  = RegInit(false.B)
168  val data              = Reg(Vec(maxSize, UInt(VLEN.W)))
169  val uopq              = Reg(Vec(maxSize, new VSegmentUop))
170  val stride            = Reg(Vec(maxSize, UInt(VLEN.W)))
171  val allocated         = RegInit(VecInit(Seq.fill(maxSize)(false.B)))
172  val enqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
173  val deqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
174  val stridePtr         = WireInit(0.U.asTypeOf(new VSegUPtr)) // for select stride/index
175
176  val segmentIdx        = RegInit(0.U(elemIdxBits.W))
177  val fieldIdx          = RegInit(0.U(fieldBits.W))
178  val segmentOffset     = RegInit(0.U(XLEN.W))
179  val splitPtr          = RegInit(0.U.asTypeOf(new VSegUPtr)) // for select load/store data
180  val splitPtrNext      = WireInit(0.U.asTypeOf(new VSegUPtr))
181
182  val exception_va      = WireInit(false.B)
183  val exception_gpa     = WireInit(false.B)
184  val exception_pa      = WireInit(false.B)
185
186  val maxSegIdx         = instMicroOp.vl - 1.U
187  val maxNfields        = instMicroOp.uop.vpu.nf
188  val latchVaddr        = RegInit(0.U(VAddrBits.W))
189
190  XSError((segmentIdx > maxSegIdx) && instMicroOpValid, s"segmentIdx > vl, something error!\n")
191  XSError((fieldIdx > maxNfields) &&  instMicroOpValid, s"fieldIdx > nfields, something error!\n")
192
193  // MicroOp
194  val baseVaddr                       = instMicroOp.baseVaddr
195  val alignedType                     = instMicroOp.alignedType
196  val fuType                          = instMicroOp.uop.fuType
197  val isVSegLoad                      = instMicroOp.isVSegLoad
198  val isVSegStore                     = instMicroOp.isVSegStore
199  val mask                            = instMicroOp.mask
200  val exceptionVec                    = instMicroOp.uop.exceptionVec
201  val issueEew                        = instMicroOp.uop.vpu.veew
202  val issueLmul                       = instMicroOp.uop.vpu.vtype.vlmul
203  val issueSew                        = instMicroOp.uop.vpu.vtype.vsew
204  val issueEmul                       = EewLog2(issueEew) - issueSew + issueLmul
205  val elemIdxInVd                     = segmentIdx & instMicroOp.uopFlowNumMask
206  val issueInstType                   = Cat(true.B, instMicroOp.uop.fuOpType(6, 5)) // always segment instruction
207  val issueUopFlowNumLog2             = GenRealFlowLog2(issueInstType, issueEmul, issueLmul, issueEew, issueSew, true) // max element number log2 in vd
208  val issueVlMax                      = instMicroOp.uopFlowNum // max elementIdx in vd
209  val issueMaxIdxInIndex              = GenVLMAX(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0)) // index element index in index register
210  val issueMaxIdxInIndexMask          = GenVlMaxMask(issueMaxIdxInIndex, elemIdxBits)
211  val issueMaxIdxInIndexLog2          = GenVLMAXLog2(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0))
212  val issueIndexIdx                   = segmentIdx & issueMaxIdxInIndexMask
213  val segmentActive                   = (mask & UIntToOH(segmentIdx)).orR
214
215  // sbuffer write interface
216  val sbufferOut                      = Wire(Decoupled(new DCacheWordReqWithVaddrAndPfFlag))
217
218
219  // segment fof instrction buffer
220  val fofBuffer                       = RegInit(0.U.asTypeOf(new DynInst))
221  val fofBufferValid                  = RegInit(false.B)
222
223
224  // Segment instruction's FSM
225  /*
226  * s_idle: wait request
227  * s_flush_sbuffer_req: flush sbuffer
228  * s_wait_flush_sbuffer_resp: wait sbuffer empty
229  * s_tlb_req: request tlb
230  * s_wait_tlb_resp: wait tlb resp
231  * s_pm: check pmp
232  * s_cache_req: request cache
233  * s_cache_resp: wait cache resp
234  * s_misalign_merge_data: merge unaligned data
235  * s_latch_and_merge_data: for read data
236  * s_send_data: for send write data
237  * s_wait_to_sbuffer: Wait for data from the sbufferOut pipelayer to be sent to the sbuffer
238  * s_finish: normal uop is complete
239  * s_fof_fix_vl: Writeback the uop of the fof instruction to modify vl.
240  * */
241  val s_idle :: s_flush_sbuffer_req :: s_wait_flush_sbuffer_resp :: s_tlb_req :: s_wait_tlb_resp :: s_pm ::s_cache_req :: s_cache_resp :: s_misalign_merge_data :: s_latch_and_merge_data :: s_send_data :: s_wait_to_sbuffer :: s_finish :: s_fof_fix_vl :: Nil = Enum(14)
242  val state             = RegInit(s_idle)
243  val stateNext         = WireInit(s_idle)
244  val sbufferEmpty      = io.flush_sbuffer.empty
245  val isEnqfof          = io.in.bits.uop.fuOpType === VlduType.vleff && io.in.valid
246  val isEnqFixVlUop     = isEnqfof && io.in.bits.uop.vpu.lastUop
247
248  // handle misalign sign
249  val curPtr             = RegInit(false.B)
250  val canHandleMisalign  = WireInit(false.B)
251  val isMisalignReg      = RegInit(false.B)
252  val isMisalignWire     = WireInit(false.B)
253  val notCross16ByteReg  = RegInit(false.B)
254  val notCross16ByteWire = WireInit(false.B)
255  val combinedData       = RegInit(0.U(XLEN.W))
256
257  val lowPagePaddr       = RegInit(0.U(PAddrBits.W))
258  val lowPageGPaddr      = RegInit(0.U(GPAddrBits.W))
259
260  val highPagePaddr      = RegInit(0.U(PAddrBits.W))
261  val highPageGPaddr     = RegInit(0.U(GPAddrBits.W))
262
263  val isFirstSplit       = !curPtr
264  val isSecondSplit      = curPtr
265  /**
266   * state update
267   */
268  state  := stateNext
269
270  /**
271   * state transfer
272   */
273  when(state === s_idle){
274    stateNext := Mux(isAfter(enqPtr, deqPtr), s_flush_sbuffer_req, s_idle)
275  }.elsewhen(state === s_flush_sbuffer_req){
276    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp) // if sbuffer is empty, go to query tlb
277
278  }.elsewhen(state === s_wait_flush_sbuffer_resp){
279    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp)
280
281  }.elsewhen(state === s_tlb_req){
282    stateNext := Mux(segmentActive, s_wait_tlb_resp, Mux(isVSegLoad, s_latch_and_merge_data, s_send_data))
283
284  }.elsewhen(state === s_wait_tlb_resp){
285    stateNext := Mux(io.dtlb.resp.fire,
286                      Mux(!io.dtlb.resp.bits.miss,
287                          s_pm,
288                          s_tlb_req),
289                      s_wait_tlb_resp)
290
291  }.elsewhen(state === s_pm){
292    when(exception_pa || exception_va || exception_gpa) {
293      stateNext := s_finish
294    } .otherwise {
295      when(canHandleMisalign && isMisalignWire && !notCross16ByteWire || (isMisalignReg && !notCross16ByteReg && isFirstSplit && isVSegStore)) {
296        stateNext := s_tlb_req
297      } .otherwise {
298        /* if is vStore, send data to sbuffer, so don't need query dcache */
299        stateNext := Mux(isVSegLoad, s_cache_req, s_send_data)
300      }
301    }
302
303  }.elsewhen(state === s_cache_req){
304    stateNext := Mux(io.rdcache.req.fire, s_cache_resp, s_cache_req)
305
306  }.elsewhen(state === s_cache_resp){
307    when(io.rdcache.resp.fire) {
308      when(io.rdcache.resp.bits.miss || io.rdcache.s2_bank_conflict) {
309        stateNext := s_cache_req
310      }.otherwise {
311
312        stateNext := Mux(isVSegLoad, Mux(isMisalignReg && !notCross16ByteReg, s_misalign_merge_data, s_latch_and_merge_data), s_send_data)
313      }
314    }.otherwise{
315      stateNext := s_cache_resp
316    }
317  }.elsewhen(state === s_misalign_merge_data) {
318    stateNext := Mux(!curPtr, s_tlb_req, s_latch_and_merge_data)
319  }.elsewhen(state === s_latch_and_merge_data) {
320    when((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields) ||
321      ((segmentIdx === maxSegIdx) && !segmentActive)) {
322
323      stateNext := s_finish // segment instruction finish
324    }.otherwise {
325      stateNext := s_tlb_req // need continue
326    }
327    /* if segment is inactive, don't need to wait access all of the field */
328  }.elsewhen(state === s_send_data) { // when sbuffer accept data
329    when(!sbufferOut.fire && segmentActive || (isMisalignReg && !notCross16ByteReg && isFirstSplit)) {
330      stateNext := s_send_data
331    }.elsewhen(segmentIdx === maxSegIdx && (fieldIdx === maxNfields && sbufferOut.fire || !segmentActive && io.sbuffer.valid && !io.sbuffer.ready)) {
332      stateNext := s_wait_to_sbuffer
333    }.elsewhen(segmentIdx === maxSegIdx && !segmentActive){
334      stateNext := s_finish // segment instruction finish
335    }.otherwise {
336      stateNext := s_tlb_req // need continue
337    }
338
339  }.elsewhen(state === s_wait_to_sbuffer){
340    stateNext := Mux(io.sbuffer.fire, s_finish, s_wait_to_sbuffer)
341
342  }.elsewhen(state === s_finish){ // writeback uop
343    stateNext := Mux(
344      distanceBetween(enqPtr, deqPtr) === 0.U,
345      Mux(fofBufferValid, s_fof_fix_vl, s_idle),
346      s_finish
347    )
348  }.elsewhen(state === s_fof_fix_vl){ // writeback uop
349    stateNext := Mux(!fofBufferValid, s_idle, s_fof_fix_vl)
350
351  }.otherwise{
352    stateNext := s_idle
353    XSError(true.B, s"Unknown state!\n")
354  }
355
356  /*************************************************************************
357   *                            enqueue logic
358   *************************************************************************/
359  io.in.ready                         := true.B
360  val fuOpType                         = io.in.bits.uop.fuOpType
361  val vtype                            = io.in.bits.uop.vpu.vtype
362  val mop                              = fuOpType(6, 5)
363  val instType                         = Cat(true.B, mop)
364  val eew                              = io.in.bits.uop.vpu.veew
365  val sew                              = vtype.vsew
366  val lmul                             = vtype.vlmul
367  val emul                             = EewLog2(eew) - sew + lmul
368  val vl                               = instMicroOp.vl
369  val vm                               = instMicroOp.uop.vpu.vm
370  val vstart                           = instMicroOp.uop.vpu.vstart
371  val srcMask                          = GenFlowMask(Mux(vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vstart, vl, true)
372  // first uop enqueue, we need to latch microOp of segment instruction
373  when(io.in.fire && !instMicroOpValid && !isEnqFixVlUop){
374    // element number in a vd
375    // TODO Rewrite it in a more elegant way.
376    val uopFlowNum                    = ZeroExt(GenRealFlowNum(instType, emul, lmul, eew, sew, true), elemIdxBits)
377    instMicroOp.baseVaddr             := io.in.bits.src_rs1
378    instMicroOpValid                  := true.B // if is first uop
379    instMicroOp.alignedType           := Mux(isIndexed(instType), sew(1, 0), eew)
380    instMicroOp.uop                   := io.in.bits.uop
381    instMicroOp.mask                  := srcMask
382    instMicroOp.vstart                := 0.U
383    instMicroOp.uopFlowNum            := uopFlowNum
384    instMicroOp.uopFlowNumMask        := GenVlMaxMask(uopFlowNum, elemIdxBits) // for merge data
385    instMicroOp.vl                    := io.in.bits.src_vl.asTypeOf(VConfig()).vl
386    instMicroOp.exceptionVl.valid     := false.B
387    instMicroOp.exceptionVl.bits      := io.in.bits.src_vl.asTypeOf(VConfig()).vl
388    segmentOffset                     := 0.U
389    instMicroOp.isFof                 := (fuOpType === VlduType.vleff) && FuType.isVSegLoad(io.in.bits.uop.fuType)
390    instMicroOp.isVSegLoad            := FuType.isVSegLoad(io.in.bits.uop.fuType)
391    instMicroOp.isVSegStore           := FuType.isVSegStore(io.in.bits.uop.fuType)
392    isMisalignReg                     := false.B
393    notCross16ByteReg                 := false.B
394  }
395  // latch data
396  when(io.in.fire && !isEnqFixVlUop){
397    data(enqPtr.value)                := io.in.bits.src_vs3
398    stride(enqPtr.value)              := io.in.bits.src_stride
399    uopq(enqPtr.value).uop            := io.in.bits.uop
400  }
401
402  // update enqptr, only 1 port
403  when(io.in.fire && !isEnqFixVlUop){
404    enqPtr                            := enqPtr + 1.U
405  }
406
407  /*************************************************************************
408   *                            output logic
409   *************************************************************************/
410
411  val indexStride                     = IndexAddr( // index for indexed instruction
412                                                    index = stride(stridePtr.value),
413                                                    flow_inner_idx = issueIndexIdx,
414                                                    eew = issueEew
415                                                  )
416  val realSegmentOffset               = Mux(isIndexed(issueInstType),
417                                            indexStride,
418                                            segmentOffset)
419  val vaddr                           = baseVaddr + (fieldIdx << alignedType).asUInt + realSegmentOffset
420
421  val misalignLowVaddr                = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W))
422  val misalignHighVaddr               = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W)) + 8.U
423  val notCross16ByteVaddr             = Cat(latchVaddr(latchVaddr.getWidth - 1, 4), 0.U(4.W))
424//  val misalignVaddr                   = Mux(notCross16ByteReg, notCross16ByteVaddr, Mux(isFirstSplit, misalignLowVaddr, misalignHighVaddr))
425  val misalignVaddr                   = Mux(isFirstSplit, misalignLowVaddr, misalignHighVaddr)
426  val tlbReqVaddr                     = Mux(isMisalignReg, misalignVaddr, vaddr)
427  //latch vaddr
428  when(state === s_tlb_req && !isMisalignReg){
429    latchVaddr := vaddr(VAddrBits - 1, 0)
430  }
431  /**
432   * tlb req and tlb resq
433   */
434
435  // query DTLB IO Assign
436  io.dtlb.req                         := DontCare
437  io.dtlb.resp.ready                  := true.B
438  io.dtlb.req.valid                   := state === s_tlb_req && segmentActive
439  io.dtlb.req.bits.cmd                := Mux(isVSegLoad, TlbCmd.read, TlbCmd.write)
440  io.dtlb.req.bits.vaddr              := tlbReqVaddr(VAddrBits - 1, 0)
441  io.dtlb.req.bits.fullva             := tlbReqVaddr
442  io.dtlb.req.bits.checkfullva        := true.B
443  io.dtlb.req.bits.size               := instMicroOp.alignedType(2,0)
444  io.dtlb.req.bits.memidx.is_ld       := isVSegLoad
445  io.dtlb.req.bits.memidx.is_st       := isVSegStore
446  io.dtlb.req.bits.debug.robIdx       := instMicroOp.uop.robIdx
447  io.dtlb.req.bits.no_translate       := false.B
448  io.dtlb.req.bits.debug.pc           := instMicroOp.uop.pc
449  io.dtlb.req.bits.debug.isFirstIssue := DontCare
450  io.dtlb.req_kill                    := false.B
451
452  val canTriggerException              = segmentIdx === 0.U || !instMicroOp.isFof // only elementIdx = 0 or is not fof can trigger
453
454  val segmentTrigger = Module(new VSegmentTrigger)
455  segmentTrigger.io.fromCsrTrigger.tdataVec             := io.fromCsrTrigger.tdataVec
456  segmentTrigger.io.fromCsrTrigger.tEnableVec           := io.fromCsrTrigger.tEnableVec
457  segmentTrigger.io.fromCsrTrigger.triggerCanRaiseBpExp := io.fromCsrTrigger.triggerCanRaiseBpExp
458  segmentTrigger.io.fromCsrTrigger.debugMode            := io.fromCsrTrigger.debugMode
459  segmentTrigger.io.memType                             := isVSegLoad
460  segmentTrigger.io.fromLoadStore.vaddr                 := Mux(isMisalignReg, misalignVaddr, latchVaddr)
461  segmentTrigger.io.fromLoadStore.isVectorUnitStride    := false.B
462  segmentTrigger.io.fromLoadStore.mask                  := 0.U
463
464  val triggerAction = segmentTrigger.io.toLoadStore.triggerAction
465  val triggerDebugMode = TriggerAction.isDmode(triggerAction)
466  val triggerBreakpoint = TriggerAction.isExp(triggerAction)
467
468  // tlb resp
469  when(io.dtlb.resp.fire && state === s_wait_tlb_resp){
470      exceptionVec(storePageFault)      := io.dtlb.resp.bits.excp(0).pf.st
471      exceptionVec(loadPageFault)       := io.dtlb.resp.bits.excp(0).pf.ld
472      exceptionVec(storeGuestPageFault) := io.dtlb.resp.bits.excp(0).gpf.st
473      exceptionVec(loadGuestPageFault)  := io.dtlb.resp.bits.excp(0).gpf.ld
474      exceptionVec(storeAccessFault)    := io.dtlb.resp.bits.excp(0).af.st
475      exceptionVec(loadAccessFault)     := io.dtlb.resp.bits.excp(0).af.ld
476      when(!io.dtlb.resp.bits.miss){
477        instMicroOp.paddr             := io.dtlb.resp.bits.paddr(0)
478        instMicroOp.exceptionVaddr    := io.dtlb.resp.bits.fullva
479        instMicroOp.exceptionGpaddr   := io.dtlb.resp.bits.gpaddr(0)
480        instMicroOp.exceptionIsForVSnonLeafPTE  := io.dtlb.resp.bits.isForVSnonLeafPTE
481        lowPagePaddr  := Mux(isMisalignReg && !notCross16ByteReg && !curPtr, io.dtlb.resp.bits.paddr(0), lowPagePaddr)
482        lowPageGPaddr := Mux(isMisalignReg && !notCross16ByteReg && !curPtr, io.dtlb.resp.bits.gpaddr(0), lowPageGPaddr)
483
484        highPagePaddr  := Mux(isMisalignReg && !notCross16ByteReg && curPtr, io.dtlb.resp.bits.paddr(0), highPagePaddr)
485        highPageGPaddr := Mux(isMisalignReg && !notCross16ByteReg && curPtr, io.dtlb.resp.bits.gpaddr(0), highPageGPaddr)
486      }
487  }
488  // pmp
489  // NOTE: only handle load/store exception here, if other exception happens, don't send here
490  val exceptionWithPf = exceptionVec(storePageFault) || exceptionVec(loadPageFault) || exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault)
491  val pmp = (io.pmpResp.asUInt & Fill(io.pmpResp.asUInt.getWidth, !exceptionWithPf)).asTypeOf(new PMPRespBundle())
492  when(state === s_pm) {
493    val highAddress = LookupTree(Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)), List(
494      "b00".U -> 0.U,
495      "b01".U -> 1.U,
496      "b10".U -> 3.U,
497      "b11".U -> 7.U
498    )) + tlbReqVaddr(4, 0)
499
500    val addr_aligned = LookupTree(Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)), List(
501      "b00".U   -> true.B,                   //b
502      "b01".U   -> (tlbReqVaddr(0)    === 0.U), //h
503      "b10".U   -> (tlbReqVaddr(1, 0) === 0.U), //w
504      "b11".U   -> (tlbReqVaddr(2, 0) === 0.U)  //d
505    ))
506
507    notCross16ByteWire   := highAddress(4) === tlbReqVaddr(4)
508    isMisalignWire       := !addr_aligned
509    canHandleMisalign := !pmp.mmio && !triggerBreakpoint && !triggerDebugMode
510    exceptionVec(loadAddrMisaligned)  := isMisalignWire && isVSegLoad  && canTriggerException && !canHandleMisalign
511    exceptionVec(storeAddrMisaligned) := isMisalignWire && isVSegStore && canTriggerException && !canHandleMisalign
512
513    exception_va  := exceptionVec(storePageFault) || exceptionVec(loadPageFault) ||
514                     exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault) ||
515                     triggerBreakpoint || triggerDebugMode || (isMisalignWire && !canHandleMisalign)
516    exception_gpa := exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault)
517    exception_pa  := pmp.st || pmp.ld || pmp.mmio
518
519    instMicroOp.exception_pa  := exception_pa
520    instMicroOp.exception_va  := exception_va
521    instMicroOp.exception_gpa := exception_gpa
522    // update storeAccessFault bit. Currently, we don't support vector MMIO
523    exceptionVec(loadAccessFault)  := (exceptionVec(loadAccessFault) || pmp.ld || pmp.mmio)   && isVSegLoad  && canTriggerException
524    exceptionVec(storeAccessFault) := (exceptionVec(storeAccessFault) || pmp.st || pmp.mmio)  && isVSegStore && canTriggerException
525    exceptionVec(breakPoint)       := triggerBreakpoint && canTriggerException
526
527    exceptionVec(storePageFault)      := exceptionVec(storePageFault)      && isVSegStore && canTriggerException
528    exceptionVec(loadPageFault)       := exceptionVec(loadPageFault)       && isVSegLoad  && canTriggerException
529    exceptionVec(storeGuestPageFault) := exceptionVec(storeGuestPageFault) && isVSegStore && canTriggerException
530    exceptionVec(loadGuestPageFault)  := exceptionVec(loadGuestPageFault)  && isVSegLoad  && canTriggerException
531
532    when(exception_va || exception_gpa || exception_pa) {
533      when(canTriggerException) {
534        instMicroOp.exceptionVstart := segmentIdx // for exception
535      }.otherwise {
536        instMicroOp.exceptionVl.valid := true.B
537        instMicroOp.exceptionVl.bits := segmentIdx
538      }
539    }
540
541    when(exceptionVec(breakPoint) || triggerDebugMode) {
542      instMicroOp.uop.trigger := triggerAction
543    }
544
545    when(isMisalignWire && canHandleMisalign && !(exception_va || exception_gpa || exception_pa)) {
546      notCross16ByteReg := notCross16ByteWire
547      isMisalignReg       := true.B
548      curPtr              := false.B
549    }
550  }
551
552  /**
553   * flush sbuffer IO Assign
554   */
555  io.flush_sbuffer.valid           := !sbufferEmpty && (state === s_flush_sbuffer_req)
556
557  /**
558  * update curPtr
559  * */
560  when(state === s_finish || state === s_latch_and_merge_data || state === s_send_data && stateNext =/= s_send_data) {
561    isMisalignReg     := false.B
562    notCross16ByteReg := false.B
563    curPtr := false.B
564  } .otherwise {
565    when(isVSegLoad) {
566      when(isMisalignReg && !notCross16ByteReg && state === s_misalign_merge_data) {
567        curPtr := true.B
568      }
569    } .otherwise {
570      when(isMisalignReg && !notCross16ByteReg && state === s_pm) {
571        curPtr := !curPtr
572      } .elsewhen(isMisalignReg && !notCross16ByteReg && state === s_pm && stateNext === s_send_data) {
573        curPtr := false.B
574      } .elsewhen(isMisalignReg && !notCross16ByteReg && state === s_send_data && stateNext === s_send_data && sbufferOut.fire) {
575        curPtr := !curPtr
576      }
577    }
578  }
579
580
581
582  /**
583   * merge data for load
584   */
585  val cacheData = LookupTree(latchVaddr(3,0), List(
586    "b0000".U -> io.rdcache.resp.bits.data_delayed(63,    0),
587    "b0001".U -> io.rdcache.resp.bits.data_delayed(63,    8),
588    "b0010".U -> io.rdcache.resp.bits.data_delayed(63,   16),
589    "b0011".U -> io.rdcache.resp.bits.data_delayed(63,   24),
590    "b0100".U -> io.rdcache.resp.bits.data_delayed(63,   32),
591    "b0101".U -> io.rdcache.resp.bits.data_delayed(63,   40),
592    "b0110".U -> io.rdcache.resp.bits.data_delayed(63,   48),
593    "b0111".U -> io.rdcache.resp.bits.data_delayed(63,   56),
594    "b1000".U -> io.rdcache.resp.bits.data_delayed(127,  64),
595    "b1001".U -> io.rdcache.resp.bits.data_delayed(127,  72),
596    "b1010".U -> io.rdcache.resp.bits.data_delayed(127,  80),
597    "b1011".U -> io.rdcache.resp.bits.data_delayed(127,  88),
598    "b1100".U -> io.rdcache.resp.bits.data_delayed(127,  96),
599    "b1101".U -> io.rdcache.resp.bits.data_delayed(127, 104),
600    "b1110".U -> io.rdcache.resp.bits.data_delayed(127, 112),
601    "b1111".U -> io.rdcache.resp.bits.data_delayed(127, 120)
602  ))
603
604  val misalignLowData  = LookupTree(latchVaddr(3,0), List(
605    "b1001".U -> io.rdcache.resp.bits.data_delayed(127,  72),
606    "b1010".U -> io.rdcache.resp.bits.data_delayed(127,  80),
607    "b1011".U -> io.rdcache.resp.bits.data_delayed(127,  88),
608    "b1100".U -> io.rdcache.resp.bits.data_delayed(127,  96),
609    "b1101".U -> io.rdcache.resp.bits.data_delayed(127, 104),
610    "b1110".U -> io.rdcache.resp.bits.data_delayed(127, 112),
611    "b1111".U -> io.rdcache.resp.bits.data_delayed(127, 120)
612  ))
613
614  val misalignCombinedData = LookupTree(latchVaddr(3,0), List(
615    "b1001".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(55,    0))(63, 0),
616    "b1010".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(47,    0))(63, 0),
617    "b1011".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(39,    0))(63, 0),
618    "b1100".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(31,    0))(63, 0),
619    "b1101".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(23,    0))(63, 0),
620    "b1110".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(15,    0))(63, 0),
621    "b1111".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(7,     0))(63, 0)
622  ))
623  when(state === s_misalign_merge_data && segmentActive){
624    when(!curPtr) {
625      combinedData := misalignLowData
626    } .otherwise {
627      combinedData := misalignCombinedData
628    }
629  }
630
631  val shiftData    = (io.rdcache.resp.bits.data_delayed >> (latchVaddr(3, 0) << 3)).asUInt(63, 0)
632  val mergemisalignData = Mux(notCross16ByteReg, shiftData, combinedData)
633  val pickData  = rdataVecHelper(alignedType(1,0), Mux(isMisalignReg, mergemisalignData, cacheData))
634  val mergedData = mergeDataWithElemIdx(
635    oldData = data(splitPtr.value),
636    newData = Seq(pickData),
637    alignedType = alignedType(1,0),
638    elemIdx = Seq(elemIdxInVd),
639    valids = Seq(true.B)
640  )
641  when(state === s_latch_and_merge_data && segmentActive){
642    data(splitPtr.value) := mergedData
643  }
644
645
646  /**
647   * split data for store
648   * */
649  val splitData = genVSData(
650    data = data(splitPtr.value),
651    elemIdx = elemIdxInVd,
652    alignedType = alignedType
653  )
654  val flowData  = genVWdata(splitData, alignedType) // TODO: connect vstd, pass vector data
655  val wmask     = genVWmask(latchVaddr, alignedType(1, 0)) & Fill(VLENB, segmentActive)
656  val bmask     = genBasemask(latchVaddr, alignedType(1, 0)) & Fill(VLENB, segmentActive)
657  val dcacheReqVaddr = Mux(isMisalignReg, misalignVaddr, latchVaddr)
658  val dcacheReqPaddr = Mux(isMisalignReg, Cat(instMicroOp.paddr(instMicroOp.paddr.getWidth - 1, PageOffsetWidth), misalignVaddr(PageOffsetWidth - 1, 0)), instMicroOp.paddr)
659  /**
660   * rdcache req, write request don't need to query dcache, because we write element to sbuffer
661   */
662  io.rdcache.req                    := DontCare
663  io.rdcache.req.valid              := state === s_cache_req && isVSegLoad
664  io.rdcache.req.bits.cmd           := MemoryOpConstants.M_XRD
665  io.rdcache.req.bits.vaddr         := dcacheReqVaddr
666  io.rdcache.req.bits.mask          := mask
667  io.rdcache.req.bits.data          := flowData
668  io.rdcache.pf_source              := LOAD_SOURCE.U
669  io.rdcache.req.bits.id            := DontCare
670  io.rdcache.resp.ready             := true.B
671  io.rdcache.s1_paddr_dup_lsu       := dcacheReqPaddr
672  io.rdcache.s1_paddr_dup_dcache    := dcacheReqPaddr
673  io.rdcache.s1_kill                := false.B
674  io.rdcache.s1_kill_data_read      := false.B
675  io.rdcache.s2_kill                := false.B
676  if (env.FPGAPlatform){
677    io.rdcache.s0_pc                := DontCare
678    io.rdcache.s1_pc                := DontCare
679    io.rdcache.s2_pc                := DontCare
680  }else{
681    io.rdcache.s0_pc                := instMicroOp.uop.pc
682    io.rdcache.s1_pc                := instMicroOp.uop.pc
683    io.rdcache.s2_pc                := instMicroOp.uop.pc
684  }
685  io.rdcache.replacementUpdated     := false.B
686  io.rdcache.is128Req               := notCross16ByteReg
687
688
689  /**
690   * write data to sbuffer
691   * */
692  val sbufferAddrLow4bit = latchVaddr(3, 0)
693
694  val notCross16BytePaddr          = Cat(instMicroOp.paddr(instMicroOp.paddr.getWidth - 1, 4), 0.U(4.W))
695  val notCross16ByteData           = flowData << (sbufferAddrLow4bit << 3)
696
697  val Cross16ByteMask = Wire(UInt(32.W))
698  val Cross16ByteData = Wire(UInt(256.W))
699  Cross16ByteMask := bmask << sbufferAddrLow4bit
700  Cross16ByteData := flowData << (sbufferAddrLow4bit << 3)
701
702  val vaddrLow  = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W))
703  val vaddrHigh = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W)) + 8.U
704
705
706  val paddrLow  = Cat(lowPagePaddr(lowPagePaddr.getWidth - 1, 3), 0.U(3.W))
707  val paddrHigh = Cat(instMicroOp.paddr(instMicroOp.paddr.getWidth - 1, 3), 0.U(3.W))
708
709  val maskLow   = Cross16ByteMask(15, 0)
710  val maskHigh  = Cross16ByteMask(31, 16)
711
712  val dataLow   = Cross16ByteData(127, 0)
713  val dataHigh  = Cross16ByteData(255, 128)
714
715  val sbuffermisalignMask          = Mux(notCross16ByteReg, wmask, Mux(isFirstSplit, maskLow, maskHigh))
716  val sbuffermisalignData          = Mux(notCross16ByteReg, notCross16ByteData, Mux(isFirstSplit, dataLow, dataHigh))
717  val sbuffermisalignPaddr         = Mux(notCross16ByteReg, notCross16BytePaddr, Mux(isFirstSplit, paddrLow, paddrHigh))
718  val sbuffermisalignVaddr         = Mux(notCross16ByteReg, notCross16ByteVaddr, Mux(isFirstSplit, vaddrLow, vaddrHigh))
719
720  val sbufferMask                  = Mux(isMisalignReg, sbuffermisalignMask, wmask)
721  val sbufferData                  = Mux(isMisalignReg, sbuffermisalignData, flowData)
722  val sbufferVaddr                 = Mux(isMisalignReg, sbuffermisalignVaddr, latchVaddr)
723  val sbufferPaddr                 = Mux(isMisalignReg, sbuffermisalignPaddr, instMicroOp.paddr)
724
725  dontTouch(wmask)
726  dontTouch(Cross16ByteMask)
727  sbufferOut.bits                  := DontCare
728  sbufferOut.valid                 := state === s_send_data && segmentActive
729  sbufferOut.bits.vecValid         := state === s_send_data && segmentActive
730  sbufferOut.bits.mask             := sbufferMask
731  sbufferOut.bits.data             := sbufferData
732  sbufferOut.bits.vaddr            := sbufferVaddr
733  sbufferOut.bits.cmd              := MemoryOpConstants.M_XWR
734  sbufferOut.bits.id               := DontCare
735  sbufferOut.bits.addr             := sbufferPaddr
736
737  NewPipelineConnect(
738    sbufferOut, io.sbuffer, io.sbuffer.fire,
739    false.B,
740    Option(s"VSegmentUnitPipelineConnect")
741  )
742
743  io.vecDifftestInfo.valid         := io.sbuffer.valid
744  io.vecDifftestInfo.bits          := uopq(deqPtr.value).uop
745
746  /**
747   * update ptr
748   * */
749  private val fieldActiveWirteFinish = sbufferOut.fire && segmentActive // writedata finish and is a active segment
750  XSError(sbufferOut.fire && !segmentActive, "Attempt write inactive segment to sbuffer, something wrong!\n")
751
752  private val segmentInactiveFinish = ((state === s_latch_and_merge_data) || (state === s_send_data && stateNext =/= s_send_data)) && !segmentActive
753
754  val splitPtrOffset = Mux(
755    isIndexed(instType),
756    Mux(lmul.asSInt < 0.S, 1.U, (1.U << lmul).asUInt),
757    Mux(emul.asSInt < 0.S, 1.U, (1.U << emul).asUInt)
758  )
759  splitPtrNext :=
760    Mux(fieldIdx === maxNfields || !segmentActive, // if segment is active, need to complete this segment, otherwise jump to next segment
761      // segment finish, By shifting 'issueUopFlowNumLog2' to the right to ensure that emul != 1 can correctly generate lateral offset.
762     (deqPtr + ((segmentIdx +& 1.U) >> issueUopFlowNumLog2).asUInt),
763      // next field.
764     (splitPtr + splitPtrOffset)
765    )
766
767  if (backendParams.debugEn){
768    dontTouch(issueUopFlowNumLog2)
769    dontTouch(issueEmul)
770    dontTouch(splitPtrNext)
771    dontTouch(stridePtr)
772    dontTouch(segmentActive)
773  }
774
775  // update splitPtr
776  when(state === s_latch_and_merge_data || (state === s_send_data && stateNext =/= s_send_data && (fieldActiveWirteFinish || !segmentActive))){
777    splitPtr := splitPtrNext
778  }.elsewhen(io.in.fire && !instMicroOpValid){
779    splitPtr := deqPtr // initial splitPtr
780  }
781
782  // update stridePtr, only use in index
783  val strideOffset = Mux(isIndexed(issueInstType), segmentIdx >> issueMaxIdxInIndexLog2, 0.U)
784  stridePtr       := deqPtr + strideOffset
785
786  // update fieldIdx
787  when(io.in.fire && !instMicroOpValid){ // init
788    fieldIdx := 0.U
789  }.elsewhen(state === s_latch_and_merge_data && segmentActive ||
790            (state === s_send_data && stateNext =/= s_send_data && fieldActiveWirteFinish)){ // only if segment is active
791
792    /* next segment, only if segment complete */
793    fieldIdx := Mux(fieldIdx === maxNfields, 0.U, fieldIdx + 1.U)
794  }.elsewhen(segmentInactiveFinish){ // segment is inactive, go to next segment
795    fieldIdx := 0.U
796  }
797  //update segmentIdx
798  when(io.in.fire && !instMicroOpValid){
799    segmentIdx := 0.U
800  }.elsewhen(fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && stateNext =/= s_send_data && fieldActiveWirteFinish)) &&
801             segmentIdx =/= maxSegIdx){ // next segment, only if segment is active
802
803    segmentIdx := segmentIdx + 1.U
804  }.elsewhen(segmentInactiveFinish && segmentIdx =/= maxSegIdx){ // if segment is inactive, go to next segment
805    segmentIdx := segmentIdx + 1.U
806  }
807
808  //update segmentOffset
809  /* when segment is active or segment is inactive, increase segmentOffset */
810  when((fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && stateNext =/= s_send_data && fieldActiveWirteFinish))) ||
811       segmentInactiveFinish){
812
813    segmentOffset := segmentOffset + Mux(isUnitStride(issueInstType), (maxNfields +& 1.U) << issueEew(1, 0), stride(stridePtr.value))
814  }
815
816  //update deqPtr
817  when((state === s_finish) && !isEmpty(enqPtr, deqPtr)){
818    deqPtr := deqPtr + 1.U
819  }
820
821
822  /*************************************************************************
823   *                            fof logic
824   *************************************************************************/
825
826  //Enq
827  when(isEnqFixVlUop && !fofBufferValid) { fofBuffer := io.in.bits.uop }
828  when(isEnqFixVlUop && !fofBufferValid) { fofBufferValid := true.B }
829
830  //Deq
831  val fofFixVlValid                    = state === s_fof_fix_vl && fofBufferValid
832
833  when(fofFixVlValid) { fofBuffer      := 0.U.asTypeOf(new DynInst) }
834  when(fofFixVlValid) { fofBufferValid := false.B }
835
836
837  /*************************************************************************
838   *                            dequeue logic
839   *************************************************************************/
840  val vdIdxInField = GenUopIdxInField(Mux(isIndexed(instType), issueLmul, issueEmul), uopq(deqPtr.value).uop.vpu.vuopIdx)
841  /*select mask of vd, maybe remove in feature*/
842  val realEw        = Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0))
843  val maskDataVec: Vec[UInt] = VecDataToMaskDataVec(instMicroOp.mask, realEw)
844  val maskUsed      = maskDataVec(vdIdxInField)
845
846  when(stateNext === s_idle){
847    instMicroOpValid := false.B
848  }
849  // writeback to backend
850  val writebackOut                     = WireInit(io.uopwriteback.bits)
851  val writebackValid                   = (state === s_finish) && !isEmpty(enqPtr, deqPtr) || fofFixVlValid
852
853  when(fofFixVlValid) {
854    writebackOut.uop                    := fofBuffer
855    writebackOut.uop.vpu.vl             := instMicroOp.exceptionVl.bits
856    writebackOut.data                   := instMicroOp.exceptionVl.bits
857    writebackOut.mask.get               := Fill(VLEN, 1.U)
858    writebackOut.uop.vpu.vmask          := Fill(VLEN, 1.U)
859  }.otherwise{
860    writebackOut.uop                    := uopq(deqPtr.value).uop
861    writebackOut.uop.vpu                := instMicroOp.uop.vpu
862    writebackOut.uop.trigger            := instMicroOp.uop.trigger
863    writebackOut.uop.exceptionVec       := instMicroOp.uop.exceptionVec
864    writebackOut.mask.get               := instMicroOp.mask
865    writebackOut.data                   := data(deqPtr.value)
866    writebackOut.vdIdx.get              := vdIdxInField
867    writebackOut.uop.vpu.vl             := Mux(instMicroOp.exceptionVl.valid, instMicroOp.exceptionVl.bits, instMicroOp.vl)
868    writebackOut.uop.vpu.vstart         := Mux(instMicroOp.uop.exceptionVec.asUInt.orR || TriggerAction.isDmode(instMicroOp.uop.trigger), instMicroOp.exceptionVstart, instMicroOp.vstart)
869    writebackOut.uop.vpu.vmask          := maskUsed
870    writebackOut.uop.vpu.vuopIdx        := uopq(deqPtr.value).uop.vpu.vuopIdx
871    // when exception updates vl, should use vtu strategy.
872    writebackOut.uop.vpu.vta            := Mux(instMicroOp.exceptionVl.valid, VType.tu, instMicroOp.uop.vpu.vta)
873    writebackOut.debug                  := DontCare
874    writebackOut.vdIdxInField.get       := vdIdxInField
875    writebackOut.uop.robIdx             := instMicroOp.uop.robIdx
876    writebackOut.uop.fuOpType           := instMicroOp.uop.fuOpType
877  }
878
879  io.uopwriteback.valid               := RegNext(writebackValid)
880  io.uopwriteback.bits                := RegEnable(writebackOut, writebackValid)
881
882  dontTouch(writebackValid)
883
884  //to RS
885  val feedbackOut                      = WireInit(0.U.asTypeOf(io.feedback.bits))
886  val feedbackValid                    = state === s_finish && !isEmpty(enqPtr, deqPtr)
887  feedbackOut.hit                     := true.B
888  feedbackOut.robIdx                  := instMicroOp.uop.robIdx
889  feedbackOut.sourceType              := DontCare
890  feedbackOut.flushState              := DontCare
891  feedbackOut.dataInvalidSqIdx        := DontCare
892  feedbackOut.sqIdx                   := uopq(deqPtr.value).uop.sqIdx
893  feedbackOut.lqIdx                   := uopq(deqPtr.value).uop.lqIdx
894
895  io.feedback.valid                   := RegNext(feedbackValid)
896  io.feedback.bits                    := RegEnable(feedbackOut, feedbackValid)
897
898  dontTouch(feedbackValid)
899
900  // exception
901  io.exceptionInfo                    := DontCare
902  io.exceptionInfo.bits.robidx        := instMicroOp.uop.robIdx
903  io.exceptionInfo.bits.uopidx        := uopq(deqPtr.value).uop.vpu.vuopIdx
904  io.exceptionInfo.bits.vstart        := instMicroOp.exceptionVstart
905  io.exceptionInfo.bits.vaddr         := instMicroOp.exceptionVaddr
906  io.exceptionInfo.bits.gpaddr        := instMicroOp.exceptionGpaddr
907  io.exceptionInfo.bits.isForVSnonLeafPTE := instMicroOp.exceptionIsForVSnonLeafPTE
908  io.exceptionInfo.bits.vl            := instMicroOp.exceptionVl.bits
909  io.exceptionInfo.valid              := (state === s_finish) && instMicroOp.uop.exceptionVec.asUInt.orR && !isEmpty(enqPtr, deqPtr)
910}
911
912