xref: /XiangShan/src/main/scala/xiangshan/mem/MemBlock.scala (revision 800ac0f1d01fac5d118955113cd5a0cc7844aff4)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.diplomacy._
23import freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp}
24import freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple}
25import freechips.rocketchip.tile.HasFPUParameters
26import freechips.rocketchip.tilelink._
27import device.MsiInfoBundle
28import utils._
29import utility._
30import system.SoCParamsKey
31import xiangshan._
32import xiangshan.ExceptionNO._
33import xiangshan.frontend.HasInstrMMIOConst
34import xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput}
35import xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo}
36import xiangshan.backend.exu.MemExeUnit
37import xiangshan.backend.fu._
38import xiangshan.backend.fu.FuType._
39import xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil}
40import xiangshan.backend.fu.util.{CSRConst, SdtrigExt}
41import xiangshan.backend.{BackendToTopBundle, TopToBackendBundle}
42import xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO}
43import xiangshan.backend.datapath.NewPipelineConnect
44import xiangshan.backend.trace.{Itype, TraceCoreInterface}
45import xiangshan.backend.Bundles._
46import xiangshan.mem._
47import xiangshan.mem.mdp._
48import xiangshan.mem.Bundles._
49import xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher}
50import xiangshan.cache._
51import xiangshan.cache.mmu._
52import coupledL2.{PrefetchRecv}
53
54trait HasMemBlockParameters extends HasXSParameter {
55  // number of memory units
56  val LduCnt  = backendParams.LduCnt
57  val StaCnt  = backendParams.StaCnt
58  val StdCnt  = backendParams.StdCnt
59  val HyuCnt  = backendParams.HyuCnt
60  val VlduCnt = backendParams.VlduCnt
61  val VstuCnt = backendParams.VstuCnt
62
63  val LdExuCnt  = LduCnt + HyuCnt
64  val StAddrCnt = StaCnt + HyuCnt
65  val StDataCnt = StdCnt
66  val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt
67  val MemAddrExtCnt = LdExuCnt + StaCnt
68  val MemVExuCnt = VlduCnt + VstuCnt
69
70  val AtomicWBPort   = 0
71  val MisalignWBPort = 1
72  val UncacheWBPort  = 2
73  val NCWBPorts = Seq(1, 2)
74}
75
76abstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters
77
78class Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) {
79  io.in.ready := io.out.ready
80  io.out.valid := io.in.valid
81  io.out.bits := 0.U.asTypeOf(io.out.bits)
82  io.out.bits.res.data := io.in.bits.data.src(0)
83  io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx
84}
85
86class ooo_to_mem(implicit p: Parameters) extends MemBlockBundle {
87  val backendToTopBypass = Flipped(new BackendToTopBundle)
88
89  val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W)))
90  val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType()))
91  val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W)))
92  val sfence = Input(new SfenceBundle)
93  val tlbCsr = Input(new TlbCsrBundle)
94  val lsqio = new Bundle {
95    val lcommit = Input(UInt(log2Up(CommitWidth + 1).W))
96    val scommit = Input(UInt(log2Up(CommitWidth + 1).W))
97    val pendingMMIOld = Input(Bool())
98    val pendingld = Input(Bool())
99    val pendingst = Input(Bool())
100    val pendingVst = Input(Bool())
101    val commit = Input(Bool())
102    val pendingPtr = Input(new RobPtr)
103    val pendingPtrNext = Input(new RobPtr)
104  }
105
106  val isStoreException = Input(Bool())
107  val isVlsException = Input(Bool())
108  val csrCtrl = Flipped(new CustomCSRCtrlIO)
109  val enqLsq = new LsqEnqIO
110  val flushSb = Input(Bool())
111
112  val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
113  val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
114
115  val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput))))
116  val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput))))
117  val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput))))
118  val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput))))
119  val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true)))))
120
121  def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu
122}
123
124class mem_to_ooo(implicit p: Parameters) extends MemBlockBundle {
125  val topToBackendBypass = new TopToBackendBundle
126
127  val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst))
128  val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W))
129  val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W))
130  val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W))
131  val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
132  // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load
133  val sqDeqPtr = Output(new SqPtr)
134  val lqDeqPtr = Output(new LqPtr)
135  val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput))
136  val stIssuePtr = Output(new SqPtr())
137
138  val memoryViolation = ValidIO(new Redirect)
139  val sbIsEmpty = Output(Bool())
140
141  val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo))
142
143  val lsqio = new Bundle {
144    val vaddr = Output(UInt(XLEN.W))
145    val vstart = Output(UInt((log2Up(VLEN) + 1).W))
146    val vl = Output(UInt((log2Up(VLEN) + 1).W))
147    val gpaddr = Output(UInt(XLEN.W))
148    val isForVSnonLeafPTE = Output(Bool())
149    val mmio = Output(Vec(LoadPipelineWidth, Bool()))
150    val uop = Output(Vec(LoadPipelineWidth, new DynInst))
151    val lqCanAccept = Output(Bool())
152    val sqCanAccept = Output(Bool())
153  }
154
155  val storeDebugInfo = Vec(EnsbufferWidth, new Bundle {
156    val robidx = Output(new RobPtr)
157    val pc     = Input(UInt(VAddrBits.W))
158  })
159
160  val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput))
161  val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput))
162  val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput))
163  val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
164  val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
165  val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true)))
166  def writeBack: Seq[DecoupledIO[MemExuOutput]] = {
167    writebackSta ++
168      writebackHyuLda ++ writebackHyuSta ++
169      writebackLda ++
170      writebackVldu ++
171      writebackStd
172  }
173
174  val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO)
175  val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO)
176  val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO)
177  val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true))
178  val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true))
179  val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO)
180  val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst))
181
182  val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool()))
183}
184
185class MemCoreTopDownIO extends Bundle {
186  val robHeadMissInDCache = Output(Bool())
187  val robHeadTlbReplay = Output(Bool())
188  val robHeadTlbMiss = Output(Bool())
189  val robHeadLoadVio = Output(Bool())
190  val robHeadLoadMSHR = Output(Bool())
191}
192
193class fetch_to_mem(implicit p: Parameters) extends XSBundle{
194  val itlb = Flipped(new TlbPtwIO())
195}
196
197// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top)
198class InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst {
199  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
200  lazy val module = new InstrUncacheBufferImpl
201
202  class InstrUncacheBufferImpl extends LazyModuleImp(this) {
203    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
204      out.a <> BufferParams.default(BufferParams.default(in.a))
205      in.d <> BufferParams.default(BufferParams.default(out.d))
206
207      // only a.valid, a.ready, a.address can change
208      // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer
209      out.a.bits.data := 0.U
210      out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W))
211      out.a.bits.opcode := 4.U // Get
212      out.a.bits.size := log2Ceil(mmioBusBytes).U
213      out.a.bits.source := 0.U
214    }
215  }
216}
217
218// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top)
219class ICacheBuffer()(implicit p: Parameters) extends LazyModule {
220  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
221  lazy val module = new ICacheBufferImpl
222
223  class ICacheBufferImpl extends LazyModuleImp(this) {
224    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
225      out.a <> BufferParams.default(BufferParams.default(in.a))
226      in.d <> BufferParams.default(BufferParams.default(out.d))
227    }
228  }
229}
230
231class ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule {
232  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
233  lazy val module = new ICacheCtrlBufferImpl
234
235  class ICacheCtrlBufferImpl extends LazyModuleImp(this) {
236    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
237      out.a <> BufferParams.default(BufferParams.default(in.a))
238      in.d <> BufferParams.default(BufferParams.default(out.d))
239    }
240  }
241}
242
243// Frontend bus goes through MemBlock
244class FrontendBridge()(implicit p: Parameters) extends LazyModule {
245  val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name
246  val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node
247  val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node
248  lazy val module = new LazyModuleImp(this) {
249  }
250}
251
252class MemBlockInlined()(implicit p: Parameters) extends LazyModule
253  with HasXSParameter {
254  override def shouldBeInlined: Boolean = true
255
256  val dcache = LazyModule(new DCacheWrapper())
257  val uncache = LazyModule(new Uncache())
258  val uncache_port = TLTempNode()
259  val uncache_xbar = TLXbar()
260  val ptw = LazyModule(new L2TLBWrapper())
261  val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null
262  val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null
263  val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name
264  val l2_pf_sender_opt = coreParams.prefetcher.map(_ =>
265    BundleBridgeSource(() => new PrefetchRecv)
266  )
267  val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ =>
268    BundleBridgeSource(() => new huancun.PrefetchRecv)
269  ) else None
270  val frontendBridge = LazyModule(new FrontendBridge)
271  // interrupt sinks
272  val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2))
273  val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
274  val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1))
275  val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size))
276
277  if (!coreParams.softPTW) {
278    ptw_to_l2_buffer.node := ptw.node
279  }
280  uncache_xbar := TLBuffer() := uncache.clientNode
281  if (dcache.uncacheNode.isDefined) {
282    dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar
283  }
284  uncache_port := TLBuffer.chainNode(2) := uncache_xbar
285
286  lazy val module = new MemBlockInlinedImp(this)
287}
288
289class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
290  with HasXSParameter
291  with HasFPUParameters
292  with HasPerfEvents
293  with HasL1PrefetchSourceParameter
294  with HasCircularQueuePtrHelper
295  with HasMemBlockParameters
296  with HasTlbConst
297  with SdtrigExt
298{
299  val io = IO(new Bundle {
300    val hartId = Input(UInt(hartIdLen.W))
301    val redirect = Flipped(ValidIO(new Redirect))
302
303    val ooo_to_mem = new ooo_to_mem
304    val mem_to_ooo = new mem_to_ooo
305    val fetch_to_mem = new fetch_to_mem
306
307    val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle))
308
309    // misc
310    val error = ValidIO(new L1CacheErrorInfo)
311    val memInfo = new Bundle {
312      val sqFull = Output(Bool())
313      val lqFull = Output(Bool())
314      val dcacheMSHRFull = Output(Bool())
315    }
316    val debug_ls = new DebugLSIO
317    val l2_hint = Input(Valid(new L2ToL1Hint()))
318    val l2PfqBusy = Input(Bool())
319    val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2))
320    val l2_pmp_resp = new PMPRespBundle
321    val l2_flush_done = Input(Bool())
322
323    val debugTopDown = new Bundle {
324      val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
325      val toCore = new MemCoreTopDownIO
326    }
327    val debugRolling = Flipped(new RobDebugRollingIO)
328
329    // All the signals from/to frontend/backend to/from bus will go through MemBlock
330    val fromTopToBackend = Input(new Bundle {
331      val msiInfo   = ValidIO(new MsiInfoBundle)
332      val clintTime = ValidIO(UInt(64.W))
333    })
334    val inner_hartId = Output(UInt(hartIdLen.W))
335    val inner_reset_vector = Output(UInt(PAddrBits.W))
336    val outer_reset_vector = Input(UInt(PAddrBits.W))
337    val outer_cpu_halt = Output(Bool())
338    val outer_l2_flush_en = Output(Bool())
339    val outer_power_down_en = Output(Bool())
340    val outer_cpu_critical_error = Output(Bool())
341    val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo)
342    val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo)
343    val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
344    val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
345
346    // reset signals of frontend & backend are generated in memblock
347    val reset_backend = Output(Reset())
348    // Reset singal from frontend.
349    val resetInFrontendBypass = new Bundle{
350      val fromFrontend = Input(Bool())
351      val toL2Top      = Output(Bool())
352    }
353    val traceCoreInterfaceBypass = new Bundle{
354      val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true))
355      val toL2Top     = new TraceCoreInterface
356    }
357
358    val topDownInfo = new Bundle {
359      val fromL2Top = Input(new TopDownFromL2Top)
360      val toBackend = Flipped(new TopDownInfo)
361    }
362  })
363
364  dontTouch(io.inner_hartId)
365  dontTouch(io.inner_reset_vector)
366  dontTouch(io.outer_reset_vector)
367  dontTouch(io.outer_cpu_halt)
368  dontTouch(io.outer_l2_flush_en)
369  dontTouch(io.outer_power_down_en)
370  dontTouch(io.outer_cpu_critical_error)
371  dontTouch(io.inner_beu_errors_icache)
372  dontTouch(io.outer_beu_errors_icache)
373  dontTouch(io.inner_hc_perfEvents)
374  dontTouch(io.outer_hc_perfEvents)
375
376  val redirect = RegNextWithEnable(io.redirect)
377
378  private val dcache = outer.dcache.module
379  val uncache = outer.uncache.module
380
381  //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq)
382
383  val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2)
384  dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B)
385  io.error <> DelayNWithValid(dcache.io.error, 2)
386  when(!csrCtrl.cache_error_enable){
387    io.error.bits.report_to_beu := false.B
388    io.error.valid := false.B
389  }
390
391  val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit))
392  val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit))
393  val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head)))
394  val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit
395  val stData = stdExeUnits.map(_.io.out)
396  val exeUnits = loadUnits ++ storeUnits
397
398  // The number of vector load/store units is decoupled with the number of load/store units
399  val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp))
400  val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp))
401  val vlMergeBuffer = Module(new VLMergeBufferImp)
402  val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp))
403  val vSegmentUnit  = Module(new VSegmentUnit)
404  val vfofBuffer    = Module(new VfofBuffer)
405
406  // misalign Buffer
407  val loadMisalignBuffer = Module(new LoadMisalignBuffer)
408  val storeMisalignBuffer = Module(new StoreMisalignBuffer)
409
410  val l1_pf_req = Wire(Decoupled(new L1PrefetchReq()))
411  dcache.io.sms_agt_evict_req.ready := false.B
412  val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
413    case _: SMSParams =>
414      val sms = Module(new SMSPrefetcher())
415      sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B))
416      sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B))
417      sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U))
418      sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U))
419      sms.io_stride_en := false.B
420      sms.io_dcache_evict <> dcache.io.sms_agt_evict_req
421      sms
422  }
423  prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B }
424  val hartId = p(XSCoreParamsKey).HartId
425  val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
426    case _ =>
427      val l1Prefetcher = Module(new L1Prefetcher())
428      l1Prefetcher.io.enable := Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true)
429      l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl
430      l1Prefetcher.l2PfqBusy := io.l2PfqBusy
431
432      // stride will train on miss or prefetch hit
433      for (i <- 0 until LduCnt) {
434        val source = loadUnits(i).io.prefetch_train_l1
435        l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && (
436          source.bits.miss || isFromStride(source.bits.meta_prefetch)
437        )
438        l1Prefetcher.stride_train(i).bits := source.bits
439        val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
440        l1Prefetcher.stride_train(i).bits.uop.pc := Mux(
441          loadUnits(i).io.s2_ptr_chasing,
442          RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
443          RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
444        )
445      }
446      for (i <- 0 until HyuCnt) {
447        val source = hybridUnits(i).io.prefetch_train_l1
448        l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && (
449          source.bits.miss || isFromStride(source.bits.meta_prefetch)
450        )
451        l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits
452        l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux(
453          hybridUnits(i).io.ldu_io.s2_ptr_chasing,
454          RegNext(io.ooo_to_mem.hybridPc(i)),
455          RegNext(RegNext(io.ooo_to_mem.hybridPc(i)))
456        )
457      }
458      l1Prefetcher
459  }
460  // load prefetch to l1 Dcache
461  l1PrefetcherOpt match {
462    case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg"))
463    case None =>
464      l1_pf_req.valid := false.B
465      l1_pf_req.bits := DontCare
466  }
467  val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B))
468
469  loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2))
470  storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2))
471  hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2))
472  val atomicsUnit = Module(new AtomicsUnit)
473
474
475  val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput)))
476  // atomicsUnit will overwrite the source from ldu if it is about to writeback
477  val atomicWritebackOverride = Mux(
478    atomicsUnit.io.out.valid,
479    atomicsUnit.io.out.bits,
480    loadUnits(AtomicWBPort).io.ldout.bits
481  )
482  ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid
483  ldaExeWbReqs(AtomicWBPort).bits  := atomicWritebackOverride
484  atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready
485  loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready
486
487  val st_data_atomics = Seq.tabulate(StdCnt)(i =>
488    stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType)
489  )
490
491  // misalignBuffer will overwrite the source from ldu if it is about to writeback
492  val misalignWritebackOverride = Mux(
493    loadUnits(MisalignWBPort).io.ldout.valid,
494    loadUnits(MisalignWBPort).io.ldout.bits,
495    loadMisalignBuffer.io.writeBack.bits
496  )
497  ldaExeWbReqs(MisalignWBPort).valid    := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid
498  ldaExeWbReqs(MisalignWBPort).bits     := misalignWritebackOverride
499  loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid
500  loadMisalignBuffer.io.loadOutValid    := loadUnits(MisalignWBPort).io.ldout.valid
501  loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid
502  loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready
503  ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid
504
505  // loadUnit will overwrite the source from uncache if it is about to writeback
506  ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout
507  io.mem_to_ooo.writebackLda <> ldaExeWbReqs
508  io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout)
509  io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x =>
510    x._1.bits  := x._2.io.out.bits
511    // AMOs do not need to write back std now.
512    x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType)
513  }
514  io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout)
515  io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout)
516  io.mem_to_ooo.otherFastWakeup := DontCare
517  io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b}
518  io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b}
519  val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta
520
521  // prefetch to l1 req
522  // Stream's confidence is always 1
523  // (LduCnt + HyuCnt) l1_pf_reqs ?
524  loadUnits.foreach(load_unit => {
525    load_unit.io.prefetch_req.valid <> l1_pf_req.valid
526    load_unit.io.prefetch_req.bits <> l1_pf_req.bits
527  })
528
529  hybridUnits.foreach(hybrid_unit => {
530    hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid
531    hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits
532  })
533
534  // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2)
535  // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline
536  val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0)
537  LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U}
538  hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U })
539
540  val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++
541                                  hybridUnits.map(_.io.canAcceptLowConfPrefetch)
542  val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++
543                                 hybridUnits.map(_.io.canAcceptLowConfPrefetch)
544  l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{
545    case i => {
546      if (LowConfPorts.contains(i)) {
547        loadUnits(i).io.canAcceptLowConfPrefetch
548      } else {
549        Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i))
550      }
551    }
552  }.reduce(_ || _)
553
554  // l1 pf fuzzer interface
555  val DebugEnableL1PFFuzzer = false
556  if (DebugEnableL1PFFuzzer) {
557    // l1 pf req fuzzer
558    val fuzzer = Module(new L1PrefetchFuzzer())
559    fuzzer.io.vaddr := DontCare
560    fuzzer.io.paddr := DontCare
561
562    // override load_unit prefetch_req
563    loadUnits.foreach(load_unit => {
564      load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid
565      load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits
566    })
567
568    // override hybrid_unit prefetch_req
569    hybridUnits.foreach(hybrid_unit => {
570      hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid
571      hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits
572    })
573
574    fuzzer.io.req.ready := l1_pf_req.ready
575  }
576
577  // TODO: fast load wakeup
578  val lsq     = Module(new LsqWrapper)
579  val sbuffer = Module(new Sbuffer)
580  // if you wants to stress test dcache store, use FakeSbuffer
581  // val sbuffer = Module(new FakeSbuffer) // out of date now
582  io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt
583
584  dcache.io.hartId := io.hartId
585  lsq.io.hartId := io.hartId
586  sbuffer.io.hartId := io.hartId
587  atomicsUnit.io.hartId := io.hartId
588
589  dcache.io.lqEmpty := lsq.io.lqEmpty
590
591  // load/store prefetch to l2 cache
592  prefetcherOpt.foreach(sms_pf => {
593    l1PrefetcherOpt.foreach(l1_pf => {
594      val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2)
595      val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2)
596
597      outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid
598      outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr)
599      outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source)
600      outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B))
601
602      sms_pf.io.enable := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B))
603
604      val l2_trace = Wire(new LoadPfDbBundle)
605      l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr
606      val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
607      table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset)
608      table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset)
609
610      val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4)
611      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid)
612      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits)
613      outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B)))
614
615      val l3_trace = Wire(new LoadPfDbBundle)
616      l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U)
617      val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
618      l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset)
619
620      XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid)
621      XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B))
622      XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid)
623      XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid)
624      XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid)
625    })
626  })
627
628  // ptw
629  val sfence = RegNext(RegNext(io.ooo_to_mem.sfence))
630  val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr))
631  private val ptw = outer.ptw.module
632  private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module
633  private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module
634  ptw.io.hartId := io.hartId
635  ptw.io.sfence <> sfence
636  ptw.io.csr.tlb <> tlbcsr
637  ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr
638
639  val perfEventsPTW = if (!coreParams.softPTW) {
640    ptw.getPerfEvents
641  } else {
642    Seq()
643  }
644
645  // dtlb
646  val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams))
647  val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams))
648  val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams))
649  val dtlb_ld = Seq(dtlb_ld_tlb_ld.io)
650  val dtlb_st = Seq(dtlb_st_tlb_st.io)
651  val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io)
652  /* tlb vec && constant variable */
653  val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch
654  val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2)
655  val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop)
656  val DTlbSize = TlbSubSizeVec.sum
657  val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1)
658  val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1)
659
660  val ptwio = Wire(new VectorTlbPtwIO(DTlbSize))
661  val dtlb_reqs = dtlb.map(_.requestor).flatten
662  val dtlb_pmps = dtlb.map(_.pmp).flatten
663  dtlb.map(_.hartId := io.hartId)
664  dtlb.map(_.sfence := sfence)
665  dtlb.map(_.csr := tlbcsr)
666  dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need
667  dtlb.map(_.redirect := redirect)
668  if (refillBothTlb) {
669    require(ldtlbParams.outReplace == sttlbParams.outReplace)
670    require(ldtlbParams.outReplace == hytlbParams.outReplace)
671    require(ldtlbParams.outReplace == pftlbParams.outReplace)
672    require(ldtlbParams.outReplace)
673
674    val replace = Module(new TlbReplace(DTlbSize, ldtlbParams))
675    replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
676  } else {
677    // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right.
678    if (ldtlbParams.outReplace) {
679      val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams))
680      replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
681    }
682    if (hytlbParams.outReplace) {
683      val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams))
684      replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
685    }
686    if (sttlbParams.outReplace) {
687      val replace_st = Module(new TlbReplace(StaCnt, sttlbParams))
688      replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
689    }
690    if (pftlbParams.outReplace) {
691      val replace_pf = Module(new TlbReplace(2, pftlbParams))
692      replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
693    }
694  }
695
696  val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid)
697  val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B)
698  ptwio.resp.ready := true.B
699
700  val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B)))
701  val tlbreplay_reg = GatedValidRegNext(tlbreplay)
702  val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay)
703
704  if (backendParams.debugEn){ dontTouch(tlbreplay) }
705
706  for (i <- 0 until LdExuCnt) {
707    tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v &&
708      ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)
709  }
710
711  dtlb.flatMap(a => a.ptw.req)
712    .zipWithIndex
713    .foreach{ case (tlb, i) =>
714      tlb.ready := ptwio.req(i).ready
715      ptwio.req(i).bits := tlb.bits
716    val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR
717      else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR
718      else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR
719      else                                 Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR
720    ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true))
721  }
722  dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data)
723  if (refillBothTlb) {
724    dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR)
725  } else {
726    dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR)
727    dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR)
728    dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR)
729  }
730  dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR)
731  dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR)
732  dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR)
733
734  val dtlbRepeater  = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize)
735  val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr)
736
737  lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb
738
739  // pmp
740  val pmp = Module(new PMP())
741  pmp.io.distribute_csr <> csrCtrl.distribute_csr
742
743  val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true)))
744  val pmp_check = pmp_checkers.map(_.io)
745  for ((p,d) <- pmp_check zip dtlb_pmps) {
746    if (HasBitmapCheck) {
747      p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
748    } else {
749      p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
750    }
751    require(p.req.bits.size.getWidth == d.bits.size.getWidth)
752  }
753
754  for (i <- 0 until LduCnt) {
755    io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls
756  }
757  for (i <- 0 until HyuCnt) {
758    io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls
759  }
760  for (i <- 0 until StaCnt) {
761    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls
762  }
763  for (i <- 0 until HyuCnt) {
764    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls
765  }
766
767  io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo)
768
769  // trigger
770  val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO))))
771  val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
772  tEnable := csrCtrl.mem_trigger.tEnableVec
773  when(csrCtrl.mem_trigger.tUpdate.valid) {
774    tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata
775  }
776  val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp
777  val debugMode = csrCtrl.mem_trigger.debugMode
778
779  val backendTriggerTimingVec = VecInit(tdata.map(_.timing))
780  val backendTriggerChainVec = VecInit(tdata.map(_.chain))
781
782  XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n")
783  for (j <- 0 until TriggerNum)
784    PrintTriggerInfo(tEnable(j), tdata(j))
785
786  // The segment instruction is executed atomically.
787  // After the segment instruction directive starts executing, no other instructions should be executed.
788  val vSegmentFlag = RegInit(false.B)
789
790  when(GatedValidRegNext(vSegmentUnit.io.in.fire)) {
791    vSegmentFlag := true.B
792  }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) {
793    vSegmentFlag := false.B
794  }
795
796  // LoadUnit
797  val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false)
798
799  for (i <- 0 until LduCnt) {
800    loadUnits(i).io.redirect <> redirect
801
802    // get input form dispatch
803    loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i)
804    loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow
805    io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare
806    loadUnits(i).io.correctMissTrain := correctMissTrain
807    io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel
808    io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup
809
810    // vector
811    if (i < VlduCnt) {
812      loadUnits(i).io.vecldout.ready := false.B
813    } else {
814      loadUnits(i).io.vecldin.valid := false.B
815      loadUnits(i).io.vecldin.bits := DontCare
816      loadUnits(i).io.vecldout.ready := false.B
817    }
818
819    // fast replay
820    loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out
821
822    // SoftPrefetch to frontend (prefetch.i)
823    loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i)
824
825    // dcache access
826    loadUnits(i).io.dcache <> dcache.io.lsu.load(i)
827    if(i == 0){
828      vSegmentUnit.io.rdcache := DontCare
829      dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid
830      dcache.io.lsu.load(i).req.bits  := Mux1H(Seq(
831        vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits,
832        loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits
833      ))
834      vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready
835    }
836
837    // Dcache requests must also be preempted by the segment.
838    when(vSegmentFlag){
839      loadUnits(i).io.dcache.req.ready             := false.B // Dcache is preempted.
840
841      dcache.io.lsu.load(0).pf_source              := vSegmentUnit.io.rdcache.pf_source
842      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu
843      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache
844      dcache.io.lsu.load(0).s1_kill                := vSegmentUnit.io.rdcache.s1_kill
845      dcache.io.lsu.load(0).s2_kill                := vSegmentUnit.io.rdcache.s2_kill
846      dcache.io.lsu.load(0).s0_pc                  := vSegmentUnit.io.rdcache.s0_pc
847      dcache.io.lsu.load(0).s1_pc                  := vSegmentUnit.io.rdcache.s1_pc
848      dcache.io.lsu.load(0).s2_pc                  := vSegmentUnit.io.rdcache.s2_pc
849      dcache.io.lsu.load(0).is128Req               := vSegmentUnit.io.rdcache.is128Req
850    }.otherwise {
851      loadUnits(i).io.dcache.req.ready             := dcache.io.lsu.load(i).req.ready
852
853      dcache.io.lsu.load(0).pf_source              := loadUnits(0).io.dcache.pf_source
854      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := loadUnits(0).io.dcache.s1_paddr_dup_lsu
855      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := loadUnits(0).io.dcache.s1_paddr_dup_dcache
856      dcache.io.lsu.load(0).s1_kill                := loadUnits(0).io.dcache.s1_kill
857      dcache.io.lsu.load(0).s2_kill                := loadUnits(0).io.dcache.s2_kill
858      dcache.io.lsu.load(0).s0_pc                  := loadUnits(0).io.dcache.s0_pc
859      dcache.io.lsu.load(0).s1_pc                  := loadUnits(0).io.dcache.s1_pc
860      dcache.io.lsu.load(0).s2_pc                  := loadUnits(0).io.dcache.s2_pc
861      dcache.io.lsu.load(0).is128Req               := loadUnits(0).io.dcache.is128Req
862    }
863
864    // forward
865    loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
866    loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
867    loadUnits(i).io.ubuffer <> uncache.io.forward(i)
868    loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i)
869    loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i)
870    // ld-ld violation check
871    loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i)
872    loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i)
873    loadUnits(i).io.csrCtrl       <> csrCtrl
874    // dcache refill req
875  // loadUnits(i).io.refill           <> delayedDcacheRefill
876    // dtlb
877    loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i)
878    if(i == 0 ){ // port 0 assign to vsegmentUnit
879      val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle
880      dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid)
881      vSegmentUnit.io.dtlb.req.ready      := dtlb_reqs.take(LduCnt)(i).req.ready
882      dtlb_reqs.take(LduCnt)(i).req.bits  := ParallelPriorityMux(Seq(
883        RegNext(vsegmentDtlbReqValid)     -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid),
884        loadUnits(i).io.tlb.req.valid     -> loadUnits(i).io.tlb.req.bits
885      ))
886    }
887    // pmp
888    loadUnits(i).io.pmp <> pmp_check(i).resp
889    // st-ld violation query
890    val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)
891    for (s <- 0 until StorePipelineWidth) {
892      loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s)
893    }
894    loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full
895    // load prefetch train
896    prefetcherOpt.foreach(pf => {
897      // sms will train on all miss load sources
898      val source = loadUnits(i).io.prefetch_train
899      pf.io.ld_in(i).valid := Mux(pf_train_on_hit,
900        source.valid,
901        source.valid && source.bits.isFirstIssue && source.bits.miss
902      )
903      pf.io.ld_in(i).bits := source.bits
904      val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
905      pf.io.ld_in(i).bits.uop.pc := Mux(
906        loadUnits(i).io.s2_ptr_chasing,
907        RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
908        RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
909      )
910    })
911    l1PrefetcherOpt.foreach(pf => {
912      // stream will train on all load sources
913      val source = loadUnits(i).io.prefetch_train_l1
914      pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue
915      pf.io.ld_in(i).bits := source.bits
916    })
917
918    // load to load fast forward: load(i) prefers data(i)
919    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
920    val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i)
921    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
922    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
923    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
924    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j))
925    loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
926    loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
927    loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
928    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
929    loadUnits(i).io.ld_fast_match := fastMatch
930    loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i)
931    loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i)
932    loadUnits(i).io.replay <> lsq.io.replay(i)
933
934    val l2_hint = RegNext(io.l2_hint)
935
936    // L2 Hint for DCache
937    dcache.io.l2_hint <> l2_hint
938
939    loadUnits(i).io.l2_hint <> l2_hint
940    loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id
941    loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full ||
942      tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i)
943
944    // passdown to lsq (load s2)
945    lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin
946    if (i == UncacheWBPort) {
947      lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache
948    } else {
949      lsq.io.ldout(i).ready := true.B
950      loadUnits(i).io.lsq.uncache.valid := false.B
951      loadUnits(i).io.lsq.uncache.bits := DontCare
952    }
953    lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data
954    lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin
955    lsq.io.l2_hint.valid := l2_hint.valid
956    lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId
957    lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword
958
959    lsq.io.tlb_hint <> dtlbRepeater.io.hint.get
960
961    // connect misalignBuffer
962    loadMisalignBuffer.io.req(i) <> loadUnits(i).io.misalign_buf
963
964    if (i == MisalignWBPort) {
965      loadUnits(i).io.misalign_ldin  <> loadMisalignBuffer.io.splitLoadReq
966      loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp
967    } else {
968      loadUnits(i).io.misalign_ldin.valid := false.B
969      loadUnits(i).io.misalign_ldin.bits := DontCare
970    }
971
972    // alter writeback exception info
973    io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err
974
975    // update mem dependency predictor
976    // io.memPredUpdate(i) := DontCare
977
978    // --------------------------------
979    // Load Triggers
980    // --------------------------------
981    loadUnits(i).io.fromCsrTrigger.tdataVec := tdata
982    loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
983    loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
984    loadUnits(i).io.fromCsrTrigger.debugMode := debugMode
985  }
986
987  for (i <- 0 until HyuCnt) {
988    hybridUnits(i).io.redirect <> redirect
989
990    // get input from dispatch
991    hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i)
992    hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow
993    hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast
994    hybridUnits(i).io.correctMissTrain := correctMissTrain
995    io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel
996    io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup
997
998    // ------------------------------------
999    //  Load Port
1000    // ------------------------------------
1001    // fast replay
1002    hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out
1003
1004    // get input from dispatch
1005    hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i)
1006    hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i)
1007
1008    // dcache access
1009    hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i)
1010    // forward
1011    hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i)
1012    hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i)
1013    // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i)
1014    hybridUnits(i).io.ldu_io.vec_forward := DontCare
1015    hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i)
1016    hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i)
1017    // ld-ld violation check
1018    hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i)
1019    hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i)
1020    hybridUnits(i).io.csrCtrl <> csrCtrl
1021    // dcache refill req
1022    hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id
1023    hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full ||
1024      tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i)
1025
1026    // dtlb
1027    hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i)
1028    // pmp
1029    hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp
1030    // st-ld violation query
1031    val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query))
1032    hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query
1033    hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full
1034    // load prefetch train
1035    prefetcherOpt.foreach(pf => {
1036      val source = hybridUnits(i).io.prefetch_train
1037      pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit,
1038        source.valid,
1039        source.valid && source.bits.isFirstIssue && source.bits.miss
1040      )
1041      pf.io.ld_in(LduCnt + i).bits := source.bits
1042      pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i)))
1043    })
1044    l1PrefetcherOpt.foreach(pf => {
1045      // stream will train on all load sources
1046      val source = hybridUnits(i).io.prefetch_train_l1
1047      pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue &&
1048                                       FuType.isLoad(source.bits.uop.fuType)
1049      pf.io.ld_in(LduCnt + i).bits := source.bits
1050      pf.io.st_in(StaCnt + i).valid := false.B
1051      pf.io.st_in(StaCnt + i).bits := DontCare
1052    })
1053    prefetcherOpt.foreach(pf => {
1054      val source = hybridUnits(i).io.prefetch_train
1055      pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit,
1056        source.valid,
1057        source.valid && source.bits.isFirstIssue && source.bits.miss
1058      ) && FuType.isStore(source.bits.uop.fuType)
1059      pf.io.st_in(StaCnt + i).bits := source.bits
1060      pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i))
1061    })
1062
1063    // load to load fast forward: load(i) prefers data(i)
1064    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
1065    val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i)
1066    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
1067    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
1068    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
1069    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j))
1070    hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
1071    hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
1072    hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
1073    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
1074    hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch
1075    hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i)
1076    hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i)
1077    hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i)
1078    hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint
1079
1080    // uncache
1081    lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache
1082    lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data
1083
1084
1085    // passdown to lsq (load s2)
1086    hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B
1087    hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare
1088    lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin
1089    // Lsq to sta unit
1090    lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out
1091
1092    // Lsq to std unit's rs
1093    lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i)
1094    lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i)
1095    // prefetch
1096    hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i)
1097
1098    io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err
1099
1100    // ------------------------------------
1101    //  Store Port
1102    // ------------------------------------
1103    hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i)
1104    hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i)
1105
1106    lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out
1107    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid
1108    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits
1109
1110    // ------------------------------------
1111    //  Vector Store Port
1112    // ------------------------------------
1113    hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B
1114
1115    // -------------------------
1116    // Store Triggers
1117    // -------------------------
1118    hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata
1119    hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1120    hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1121    hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode
1122  }
1123
1124  // misalignBuffer
1125  loadMisalignBuffer.io.redirect                <> redirect
1126  loadMisalignBuffer.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1127  loadMisalignBuffer.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1128  loadMisalignBuffer.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1129  loadMisalignBuffer.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1130  loadMisalignBuffer.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1131  loadMisalignBuffer.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1132  loadMisalignBuffer.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1133  loadMisalignBuffer.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1134  loadMisalignBuffer.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1135
1136  lsq.io.loadMisalignFull                       := loadMisalignBuffer.io.loadMisalignFull
1137
1138  storeMisalignBuffer.io.redirect               <> redirect
1139  storeMisalignBuffer.io.rob.lcommit            := io.ooo_to_mem.lsqio.lcommit
1140  storeMisalignBuffer.io.rob.scommit            := io.ooo_to_mem.lsqio.scommit
1141  storeMisalignBuffer.io.rob.pendingMMIOld      := io.ooo_to_mem.lsqio.pendingMMIOld
1142  storeMisalignBuffer.io.rob.pendingld          := io.ooo_to_mem.lsqio.pendingld
1143  storeMisalignBuffer.io.rob.pendingst          := io.ooo_to_mem.lsqio.pendingst
1144  storeMisalignBuffer.io.rob.pendingVst         := io.ooo_to_mem.lsqio.pendingVst
1145  storeMisalignBuffer.io.rob.commit             := io.ooo_to_mem.lsqio.commit
1146  storeMisalignBuffer.io.rob.pendingPtr         := io.ooo_to_mem.lsqio.pendingPtr
1147  storeMisalignBuffer.io.rob.pendingPtrNext     := io.ooo_to_mem.lsqio.pendingPtrNext
1148
1149  lsq.io.maControl                              <> storeMisalignBuffer.io.sqControl
1150
1151  lsq.io.cmoOpReq <> dcache.io.cmoOpReq
1152  lsq.io.cmoOpResp <> dcache.io.cmoOpResp
1153
1154  // Prefetcher
1155  val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt
1156  val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx)
1157  val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1
1158  prefetcherOpt match {
1159  case Some(pf) =>
1160    dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req
1161    pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp
1162  case None =>
1163    dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare
1164    dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B
1165    dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B
1166  }
1167  l1PrefetcherOpt match {
1168    case Some(pf) =>
1169      dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req
1170      pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp
1171    case None =>
1172        dtlb_reqs(StreamDTLBPortIndex) := DontCare
1173        dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B
1174        dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B
1175  }
1176  dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req
1177  dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B
1178  io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp
1179
1180  // StoreUnit
1181  for (i <- 0 until StdCnt) {
1182    stdExeUnits(i).io.flush <> redirect
1183    stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid
1184    io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready
1185    stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits
1186  }
1187
1188  for (i <- 0 until StaCnt) {
1189    val stu = storeUnits(i)
1190
1191    stu.io.redirect      <> redirect
1192    stu.io.csrCtrl       <> csrCtrl
1193    stu.io.dcache        <> dcache.io.lsu.sta(i)
1194    stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow
1195    stu.io.stin         <> io.ooo_to_mem.issueSta(i)
1196    stu.io.lsq          <> lsq.io.sta.storeAddrIn(i)
1197    stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i)
1198    // dtlb
1199    stu.io.tlb          <> dtlb_st.head.requestor(i)
1200    stu.io.pmp          <> pmp_check(LduCnt + HyuCnt + 1 + i).resp
1201
1202    // -------------------------
1203    // Store Triggers
1204    // -------------------------
1205    stu.io.fromCsrTrigger.tdataVec := tdata
1206    stu.io.fromCsrTrigger.tEnableVec := tEnable
1207    stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1208    stu.io.fromCsrTrigger.debugMode := debugMode
1209
1210    // prefetch
1211    stu.io.prefetch_req <> sbuffer.io.store_prefetch(i)
1212
1213    // store unit does not need fast feedback
1214    io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare
1215
1216    // Lsq to sta unit
1217    lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out
1218
1219    // connect misalignBuffer
1220    storeMisalignBuffer.io.req(i) <> stu.io.misalign_buf
1221
1222    if (i == 0) {
1223      stu.io.misalign_stin  <> storeMisalignBuffer.io.splitStoreReq
1224      stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp
1225    } else {
1226      stu.io.misalign_stin.valid := false.B
1227      stu.io.misalign_stin.bits := DontCare
1228    }
1229
1230    // Lsq to std unit's rs
1231    if (i < VstuCnt){
1232      when (vsSplit(i).io.vstd.get.valid) {
1233        lsq.io.std.storeDataIn(i).valid := true.B
1234        lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits
1235        stData(i).ready := false.B
1236      }.otherwise {
1237        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1238        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1239        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1240        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1241        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1242        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1243        stData(i).ready := true.B
1244      }
1245    } else {
1246        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1247        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1248        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1249        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1250        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1251        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1252        stData(i).ready := true.B
1253    }
1254    lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle))
1255    lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare)
1256
1257
1258    // store prefetch train
1259    l1PrefetcherOpt.foreach(pf => {
1260      // stream will train on all load sources
1261      pf.io.st_in(i).valid := false.B
1262      pf.io.st_in(i).bits := DontCare
1263    })
1264
1265    prefetcherOpt.foreach(pf => {
1266      pf.io.st_in(i).valid := Mux(pf_train_on_hit,
1267        stu.io.prefetch_train.valid,
1268        stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && (
1269          stu.io.prefetch_train.bits.miss
1270          )
1271      )
1272      pf.io.st_in(i).bits := stu.io.prefetch_train.bits
1273      pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec)
1274    })
1275
1276    // 1. sync issue info to store set LFST
1277    // 2. when store issue, broadcast issued sqPtr to wake up the following insts
1278    // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid
1279    // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits
1280    io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid
1281    io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits
1282
1283    stu.io.stout.ready := true.B
1284
1285    // vector
1286    if (i < VstuCnt) {
1287      stu.io.vecstin <> vsSplit(i).io.out
1288      // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect
1289    } else {
1290      stu.io.vecstin.valid := false.B
1291      stu.io.vecstin.bits := DontCare
1292      stu.io.vecstout.ready := false.B
1293    }
1294    stu.io.vec_isFirstIssue := true.B // TODO
1295  }
1296
1297  val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput)))
1298  sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid
1299  sqOtherStout.bits  := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits)
1300  assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.")
1301
1302  // Store writeback by StoreQueue:
1303  //   1. cbo Zero
1304  //   2. mmio
1305  // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority.
1306  val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout))
1307  NewPipelineConnect(
1308    sqOtherStout, otherStout, otherStout.fire,
1309    false.B,
1310    Option("otherStoutConnect")
1311  )
1312  otherStout.ready := false.B
1313  when (otherStout.valid && !storeUnits(0).io.stout.valid) {
1314    stOut(0).valid := true.B
1315    stOut(0).bits  := otherStout.bits
1316    otherStout.ready := true.B
1317  }
1318  lsq.io.mmioStout.ready := sqOtherStout.ready
1319  lsq.io.cboZeroStout.ready := sqOtherStout.ready
1320
1321  // vec mmio writeback
1322  lsq.io.vecmmioStout.ready := false.B
1323
1324  // miss align buffer will overwrite stOut(0)
1325  val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid
1326  storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack
1327  storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid
1328  storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid
1329  when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) {
1330    stOut(0).valid := true.B
1331    stOut(0).bits  := storeMisalignBuffer.io.writeBack.bits
1332  }
1333
1334  // Uncache
1335  uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1336  uncache.io.hartId := io.hartId
1337  lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1338
1339  // Lsq
1340  io.mem_to_ooo.lsqio.mmio       := lsq.io.rob.mmio
1341  io.mem_to_ooo.lsqio.uop        := lsq.io.rob.uop
1342  lsq.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1343  lsq.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1344  lsq.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1345  lsq.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1346  lsq.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1347  lsq.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1348  lsq.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1349  lsq.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1350  lsq.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1351
1352  //  lsq.io.rob            <> io.lsqio.rob
1353  lsq.io.enq            <> io.ooo_to_mem.enqLsq
1354  lsq.io.brqRedirect    <> redirect
1355
1356  //  violation rollback
1357  def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
1358    val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx)))
1359    val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
1360      (if (j < i) !xs(j).valid || compareVec(i)(j)
1361      else if (j == i) xs(i).valid
1362      else !xs(j).valid || !compareVec(j)(i))
1363    )).andR))
1364    resultOnehot
1365  }
1366  val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback
1367  val oldestOneHot = selectOldestRedirect(allRedirect)
1368  val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect))
1369  // memory replay would not cause IAF/IPF/IGPF
1370  oldestRedirect.bits.cfiUpdate.backendIAF := false.B
1371  oldestRedirect.bits.cfiUpdate.backendIPF := false.B
1372  oldestRedirect.bits.cfiUpdate.backendIGPF := false.B
1373  io.mem_to_ooo.memoryViolation := oldestRedirect
1374  io.mem_to_ooo.lsqio.lqCanAccept  := lsq.io.lqCanAccept
1375  io.mem_to_ooo.lsqio.sqCanAccept  := lsq.io.sqCanAccept
1376
1377  // lsq.io.uncache        <> uncache.io.lsq
1378  val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3)
1379  val uncacheState = RegInit(s_idle)
1380  val uncacheReq = Wire(Decoupled(new UncacheWordReq))
1381  val uncacheIdResp = uncache.io.lsq.idResp
1382  val uncacheResp = Wire(Decoupled(new UncacheWordResp))
1383
1384  uncacheReq.bits := DontCare
1385  uncacheReq.valid := false.B
1386  uncacheReq.ready := false.B
1387  uncacheResp.bits := DontCare
1388  uncacheResp.valid := false.B
1389  uncacheResp.ready := false.B
1390  lsq.io.uncache.req.ready := false.B
1391  lsq.io.uncache.idResp.valid := false.B
1392  lsq.io.uncache.idResp.bits := DontCare
1393  lsq.io.uncache.resp.valid := false.B
1394  lsq.io.uncache.resp.bits := DontCare
1395
1396  switch (uncacheState) {
1397    is (s_idle) {
1398      when (uncacheReq.fire) {
1399        when (lsq.io.uncache.req.valid) {
1400          when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1401            uncacheState := s_scalar_uncache
1402          }
1403        }.otherwise {
1404          // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR
1405          when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1406            uncacheState := s_vector_uncache
1407          }
1408        }
1409      }
1410    }
1411
1412    is (s_scalar_uncache) {
1413      when (uncacheResp.fire) {
1414        uncacheState := s_idle
1415      }
1416    }
1417
1418    is (s_vector_uncache) {
1419      when (uncacheResp.fire) {
1420        uncacheState := s_idle
1421      }
1422    }
1423  }
1424
1425  when (lsq.io.uncache.req.valid) {
1426    uncacheReq <> lsq.io.uncache.req
1427  }
1428  when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1429    lsq.io.uncache.resp <> uncacheResp
1430    lsq.io.uncache.idResp <> uncacheIdResp
1431  }.otherwise {
1432    when (uncacheState === s_scalar_uncache) {
1433      lsq.io.uncache.resp <> uncacheResp
1434      lsq.io.uncache.idResp <> uncacheIdResp
1435    }
1436  }
1437  // delay dcache refill for 1 cycle for better timing
1438  AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B)
1439  AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B)
1440
1441  //lsq.io.refill         := delayedDcacheRefill
1442  lsq.io.release        := dcache.io.lsu.release
1443  lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt
1444  lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt
1445  lsq.io.lqDeq <> io.mem_to_ooo.lqDeq
1446  lsq.io.sqDeq <> io.mem_to_ooo.sqDeq
1447  // Todo: assign these
1448  io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr
1449  io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr
1450  lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel
1451
1452  // LSQ to store buffer
1453  lsq.io.sbuffer        <> sbuffer.io.in
1454  sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid
1455  sbuffer.io.in(0).bits  := Mux1H(Seq(
1456    vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits,
1457    lsq.io.sbuffer(0).valid       -> lsq.io.sbuffer(0).bits
1458  ))
1459  vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready
1460  lsq.io.sqEmpty        <> sbuffer.io.sqempty
1461  dcache.io.force_write := lsq.io.force_write
1462
1463  // Initialize when unenabled difftest.
1464  sbuffer.io.vecDifftestInfo      := DontCare
1465  lsq.io.sbufferVecDifftestInfo   := DontCare
1466  vSegmentUnit.io.vecDifftestInfo := DontCare
1467  if (env.EnableDifftest) {
1468    sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) =>
1469      if (index == 0) {
1470        val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid
1471        sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid)
1472        sbufferPort.bits  := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits)
1473
1474        vSegmentUnit.io.vecDifftestInfo.ready  := sbufferPort.ready
1475        lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready
1476      } else {
1477         sbufferPort <> lsq.io.sbufferVecDifftestInfo(index)
1478      }
1479    }
1480  }
1481
1482  // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease
1483  // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire &&
1484  //   vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop
1485  // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits
1486
1487  // vector
1488  val vLoadCanAccept  = (0 until VlduCnt).map(i =>
1489    vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1490  )
1491  val vStoreCanAccept = (0 until VstuCnt).map(i =>
1492    vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1493  )
1494  val isSegment     = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType)
1495  val isFixVlUop    = io.ooo_to_mem.issueVldu.map{x =>
1496    x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid
1497  }
1498
1499  // init port
1500  /**
1501   * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop
1502   * for now:
1503   *  RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0
1504   *  RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1
1505   *
1506   * vector load don't need feedback
1507   *
1508   *  RS0 -> VlSplit0  -> ldu0 -> |
1509   *  RS1 -> VlSplit1  -> ldu1 -> |  -> vlMergebuffer
1510   *        replayIO   -> ldu3 -> |
1511   * */
1512  (0 until VstuCnt).foreach{i =>
1513    vsMergeBuffer(i).io.fromPipeline := DontCare
1514    vsMergeBuffer(i).io.fromSplit := DontCare
1515
1516    vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush
1517    vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex
1518  }
1519
1520  (0 until VstuCnt).foreach{i =>
1521    vsSplit(i).io.redirect <> redirect
1522    vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1523    vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1524                              vStoreCanAccept(i) && !isSegment
1525    vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head
1526    NewPipelineConnect(
1527      vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire,
1528      Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)),
1529      Option("VsSplitConnectStu")
1530    )
1531    vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data
1532
1533    vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full
1534    vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid
1535
1536  }
1537  (0 until VlduCnt).foreach{i =>
1538    vlSplit(i).io.redirect <> redirect
1539    vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1540    vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1541                              vLoadCanAccept(i) && !isSegment && !isFixVlUop(i)
1542    vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i)
1543    vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold
1544    vlSplit(i).io.threshold.get.bits  := lsq.io.lqDeqPtr
1545    NewPipelineConnect(
1546      vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire,
1547      Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)),
1548      Option("VlSplitConnectLdu")
1549    )
1550
1551    //Subsequent instrction will be blocked
1552    vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid
1553    vfofBuffer.io.in(i).bits  := io.ooo_to_mem.issueVldu(i).bits
1554  }
1555  (0 until LduCnt).foreach{i=>
1556    loadUnits(i).io.vecldout.ready         := vlMergeBuffer.io.fromPipeline(i).ready
1557    loadMisalignBuffer.io.vecWriteBack.ready := true.B
1558
1559    if (i == MisalignWBPort) {
1560      when(loadUnits(i).io.vecldout.valid) {
1561        vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1562        vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1563      } .otherwise {
1564        vlMergeBuffer.io.fromPipeline(i).valid   := loadMisalignBuffer.io.vecWriteBack.valid
1565        vlMergeBuffer.io.fromPipeline(i).bits    := loadMisalignBuffer.io.vecWriteBack.bits
1566      }
1567    } else {
1568      vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1569      vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1570    }
1571  }
1572
1573  (0 until StaCnt).foreach{i=>
1574    if(i < VstuCnt){
1575      storeUnits(i).io.vecstout.ready := true.B
1576      storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready
1577
1578      when(storeUnits(i).io.vecstout.valid) {
1579        vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid
1580        vsMergeBuffer(i).io.fromPipeline.head.bits  := storeUnits(i).io.vecstout.bits
1581      } .otherwise {
1582        vsMergeBuffer(i).io.fromPipeline.head.valid   := storeMisalignBuffer.io.vecWriteBack(i).valid
1583        vsMergeBuffer(i).io.fromPipeline.head.bits    := storeMisalignBuffer.io.vecWriteBack(i).bits
1584      }
1585    }
1586  }
1587
1588  (0 until VlduCnt).foreach{i=>
1589    io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i)
1590  }
1591
1592  vlMergeBuffer.io.redirect <> redirect
1593  vsMergeBuffer.map(_.io.redirect <> redirect)
1594  (0 until VlduCnt).foreach{i=>
1595    vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i)
1596  }
1597  (0 until VstuCnt).foreach{i=>
1598    vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i)
1599  }
1600
1601  (0 until VlduCnt).foreach{i=>
1602    // send to RS
1603    vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow
1604    io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare
1605  }
1606  (0 until VstuCnt).foreach{i =>
1607    // send to RS
1608    if (i == 0){
1609      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid
1610      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq(
1611        vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits,
1612        vsMergeBuffer(i).io.feedback.head.valid ->  vsMergeBuffer(i).io.feedback.head.bits
1613      ))
1614      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1615    } else {
1616      vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow
1617      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1618    }
1619  }
1620
1621  (0 until VlduCnt).foreach{i=>
1622    if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback
1623      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid
1624      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1625        vSegmentUnit.io.uopwriteback.valid          -> vSegmentUnit.io.uopwriteback.bits,
1626        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1627        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1628      ))
1629      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid
1630      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid
1631      vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1632    } else if (i == 1) {
1633      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid
1634      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1635        vfofBuffer.io.uopWriteback.valid            -> vfofBuffer.io.uopWriteback.bits,
1636        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1637        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1638      ))
1639      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid
1640      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid
1641      vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1642    } else {
1643      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid
1644      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1645        vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits,
1646        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1647      ))
1648      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready
1649      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid
1650    }
1651
1652    vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid
1653    vfofBuffer.io.mergeUopWriteback(i).bits  := vlMergeBuffer.io.uopWriteback(i).bits
1654  }
1655
1656
1657  vfofBuffer.io.redirect <> redirect
1658
1659  // Sbuffer
1660  sbuffer.io.csrCtrl    <> csrCtrl
1661  sbuffer.io.dcache     <> dcache.io.lsu.store
1662  sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected
1663  sbuffer.io.force_write <> lsq.io.force_write
1664  // flush sbuffer
1665  val cmoFlush = lsq.io.flushSbuffer.valid
1666  val fenceFlush = io.ooo_to_mem.flushSb
1667  val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid
1668  val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty
1669  io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty)
1670
1671  // if both of them tries to flush sbuffer at the same time
1672  // something must have gone wrong
1673  assert(!(fenceFlush && atomicsFlush && cmoFlush))
1674  sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush)
1675  uncache.io.flush.valid := sbuffer.io.flush.valid
1676
1677  // AtomicsUnit: AtomicsUnit will override other control signials,
1678  // as atomics insts (LR/SC/AMO) will block the pipeline
1679  val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1)
1680  val state = RegInit(s_normal)
1681
1682  val st_atomics = Seq.tabulate(StaCnt)(i =>
1683    io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType))
1684  ) ++ Seq.tabulate(HyuCnt)(i =>
1685    io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType))
1686  )
1687
1688  for (i <- 0 until StaCnt) when(st_atomics(i)) {
1689    io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready
1690    storeUnits(i).io.stin.valid := false.B
1691
1692    state := s_atomics(i)
1693  }
1694  for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) {
1695    io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready
1696    hybridUnits(i).io.lsin.valid := false.B
1697
1698    state := s_atomics(StaCnt + i)
1699    assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _))
1700  }
1701  when (atomicsUnit.io.out.valid) {
1702    state := s_normal
1703  }
1704
1705  atomicsUnit.io.in.valid := st_atomics.reduce(_ || _)
1706  atomicsUnit.io.in.bits  := Mux1H(Seq.tabulate(StaCnt)(i =>
1707    st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++
1708    Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits))
1709  atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) =>
1710    stdin.valid := st_data_atomics(i)
1711    stdin.bits := stData(i).bits
1712  }
1713  atomicsUnit.io.redirect <> redirect
1714
1715  // TODO: complete amo's pmp support
1716  val amoTlb = dtlb_ld(0).requestor(0)
1717  atomicsUnit.io.dtlb.resp.valid := false.B
1718  atomicsUnit.io.dtlb.resp.bits  := DontCare
1719  atomicsUnit.io.dtlb.req.ready  := amoTlb.req.ready
1720  atomicsUnit.io.pmpResp := pmp_check(0).resp
1721
1722  atomicsUnit.io.dcache <> dcache.io.lsu.atomics
1723  atomicsUnit.io.flush_sbuffer.empty := stIsEmpty
1724
1725  atomicsUnit.io.csrCtrl := csrCtrl
1726
1727  // for atomicsUnit, it uses loadUnit(0)'s TLB port
1728
1729  when (state =/= s_normal) {
1730    // use store wb port instead of load
1731    loadUnits(0).io.ldout.ready := false.B
1732    // use load_0's TLB
1733    atomicsUnit.io.dtlb <> amoTlb
1734
1735    // hw prefetch should be disabled while executing atomic insts
1736    loadUnits.map(i => i.io.prefetch_req.valid := false.B)
1737
1738    // make sure there's no in-flight uops in load unit
1739    assert(!loadUnits(0).io.ldout.valid)
1740  }
1741
1742  lsq.io.flushSbuffer.empty := sbuffer.io.sbempty
1743
1744  for (i <- 0 until StaCnt) {
1745    when (state === s_atomics(i)) {
1746      io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1747      assert(!storeUnits(i).io.feedback_slow.valid)
1748    }
1749  }
1750  for (i <- 0 until HyuCnt) {
1751    when (state === s_atomics(StaCnt + i)) {
1752      io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1753      assert(!hybridUnits(i).io.feedback_slow.valid)
1754    }
1755  }
1756
1757  lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException
1758  // Exception address is used several cycles after flush.
1759  // We delay it by 10 cycles to ensure its flush safety.
1760  val atomicsException = RegInit(false.B)
1761  when (DelayN(redirect.valid, 10) && atomicsException) {
1762    atomicsException := false.B
1763  }.elsewhen (atomicsUnit.io.exceptionInfo.valid) {
1764    atomicsException := true.B
1765  }
1766
1767  val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid
1768  val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1769    loadMisalignBuffer.io.overwriteExpBuf.vaddr,
1770    storeMisalignBuffer.io.overwriteExpBuf.vaddr
1771  )
1772  val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1773    loadMisalignBuffer.io.overwriteExpBuf.isHyper,
1774    storeMisalignBuffer.io.overwriteExpBuf.isHyper
1775  )
1776  val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1777    loadMisalignBuffer.io.overwriteExpBuf.gpaddr,
1778    storeMisalignBuffer.io.overwriteExpBuf.gpaddr
1779  )
1780  val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1781    loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE,
1782    storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE
1783  )
1784
1785  val vSegmentException = RegInit(false.B)
1786  when (DelayN(redirect.valid, 10) && vSegmentException) {
1787    vSegmentException := false.B
1788  }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) {
1789    vSegmentException := true.B
1790  }
1791  val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid)
1792  val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid)
1793  val vSegmentExceptionVl     = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid)
1794  val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid)
1795  val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid)
1796  val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid)
1797  val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid)
1798  val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid)
1799
1800  val exceptionVaddr = Mux(
1801    atomicsException,
1802    atomicsExceptionAddress,
1803    Mux(misalignBufExceptionOverwrite,
1804      misalignBufExceptionVaddr,
1805      Mux(vSegmentException,
1806        vSegmentExceptionAddress,
1807        lsq.io.exceptionAddr.vaddr
1808      )
1809    )
1810  )
1811  // whether vaddr need ext or is hyper inst:
1812  // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false
1813  // IsHyper: atomicsException -> false; vSegmentException -> false
1814  val exceptionVaNeedExt = !atomicsException &&
1815    (misalignBufExceptionOverwrite ||
1816      (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt))
1817  val exceptionIsHyper = !atomicsException &&
1818    (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper ||
1819      (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite))
1820
1821  def GenExceptionVa(mode: UInt, isVirt: Bool, vaNeedExt: Bool,
1822                     satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle,
1823                     vaddr: UInt) = {
1824    require(VAddrBits >= 50)
1825
1826    val Sv39 = satp.mode === 8.U
1827    val Sv48 = satp.mode === 9.U
1828    val Sv39x4 = vsatp.mode === 8.U || hgatp.mode === 8.U
1829    val Sv48x4 = vsatp.mode === 9.U || hgatp.mode === 9.U
1830    val vmEnable = !isVirt && (Sv39 || Sv48) && (mode < CSRConst.ModeM)
1831    val s2xlateEnable = isVirt && (Sv39x4 || Sv48x4) && (mode < CSRConst.ModeM)
1832
1833    val s2xlate = MuxCase(noS2xlate, Seq(
1834      !isVirt                                    -> noS2xlate,
1835      (vsatp.mode =/= 0.U && hgatp.mode =/= 0.U) -> allStage,
1836      (vsatp.mode === 0.U)                       -> onlyStage2,
1837      (hgatp.mode === 0.U)                       -> onlyStage1
1838    ))
1839    val onlyS2 = s2xlate === onlyStage2
1840
1841    val bareAddr   = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN)
1842    val sv39Addr   = SignExt(vaddr.take(39), XLEN)
1843    val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN)
1844    val sv48Addr   = SignExt(vaddr.take(48), XLEN)
1845    val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN)
1846
1847    val ExceptionVa = Wire(UInt(XLEN.W))
1848    when (vaNeedExt) {
1849      ExceptionVa := Mux1H(Seq(
1850        (!(vmEnable || s2xlateEnable)) -> bareAddr,
1851        (!onlyS2 && (Sv39 || Sv39x4))  -> sv39Addr,
1852        (!onlyS2 && (Sv48 || Sv48x4))  -> sv48Addr,
1853        ( onlyS2 && (Sv39 || Sv39x4))  -> sv39x4Addr,
1854        ( onlyS2 && (Sv48 || Sv48x4))  -> sv48x4Addr,
1855      ))
1856    } .otherwise {
1857      ExceptionVa := vaddr
1858    }
1859
1860    ExceptionVa
1861  }
1862
1863  io.mem_to_ooo.lsqio.vaddr := RegNext(
1864    GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt,
1865    tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr)
1866  )
1867
1868  // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time.
1869  XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!")
1870  io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException,
1871                                            vSegmentExceptionVstart,
1872                                            lsq.io.exceptionAddr.vstart)
1873  )
1874  io.mem_to_ooo.lsqio.vl     := RegNext(Mux(vSegmentException,
1875                                            vSegmentExceptionVl,
1876                                            lsq.io.exceptionAddr.vl)
1877  )
1878
1879  XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n")
1880  io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux(
1881    atomicsException,
1882    atomicsExceptionGPAddress,
1883    Mux(misalignBufExceptionOverwrite,
1884      misalignBufExceptionGpaddr,
1885      Mux(vSegmentException,
1886        vSegmentExceptionGPAddress,
1887        lsq.io.exceptionAddr.gpaddr
1888      )
1889    )
1890  ))
1891  io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux(
1892    atomicsException,
1893    atomicsExceptionIsForVSnonLeafPTE,
1894    Mux(misalignBufExceptionOverwrite,
1895      misalignBufExceptionIsForVSnonLeafPTE,
1896      Mux(vSegmentException,
1897        vSegmentExceptionIsForVSnonLeafPTE,
1898        lsq.io.exceptionAddr.isForVSnonLeafPTE
1899      )
1900    )
1901  ))
1902  io.mem_to_ooo.topToBackendBypass match { case x =>
1903    x.hartId            := io.hartId
1904    x.l2FlushDone       := RegNext(io.l2_flush_done)
1905    x.externalInterrupt.msip  := outer.clint_int_sink.in.head._1(0)
1906    x.externalInterrupt.mtip  := outer.clint_int_sink.in.head._1(1)
1907    x.externalInterrupt.meip  := outer.plic_int_sink.in.head._1(0)
1908    x.externalInterrupt.seip  := outer.plic_int_sink.in.last._1(0)
1909    x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0)
1910    x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0)
1911    x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1)
1912    x.msiInfo           := DelayNWithValid(io.fromTopToBackend.msiInfo, 1)
1913    x.clintTime         := DelayNWithValid(io.fromTopToBackend.clintTime, 1)
1914  }
1915
1916  io.memInfo.sqFull := RegNext(lsq.io.sqFull)
1917  io.memInfo.lqFull := RegNext(lsq.io.lqFull)
1918  io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull)
1919
1920  io.inner_hartId := io.hartId
1921  io.inner_reset_vector := RegNext(io.outer_reset_vector)
1922  io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted
1923  io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable
1924  io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable
1925  io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError
1926  io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache)
1927  io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents)
1928
1929  // vector segmentUnit
1930  vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits
1931  vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction
1932  vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits
1933  vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid
1934  vSegmentUnit.io.pmpResp <> pmp_check.head.resp
1935  vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty
1936  vSegmentUnit.io.redirect <> redirect
1937  vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits
1938  vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid
1939  vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict
1940  // -------------------------
1941  // Vector Segment Triggers
1942  // -------------------------
1943  vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata
1944  vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable
1945  vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1946  vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode
1947
1948  // reset tree of MemBlock
1949  if (p(DebugOptionsKey).ResetGen) {
1950    val leftResetTree = ResetGenNode(
1951      Seq(
1952        ModuleNode(ptw),
1953        ModuleNode(ptw_to_l2_buffer),
1954        ModuleNode(lsq),
1955        ModuleNode(dtlb_st_tlb_st),
1956        ModuleNode(dtlb_prefetch_tlb_prefetch),
1957        ModuleNode(pmp)
1958      )
1959      ++ pmp_checkers.map(ModuleNode(_))
1960      ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil)
1961      ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil)
1962    )
1963    val rightResetTree = ResetGenNode(
1964      Seq(
1965        ModuleNode(sbuffer),
1966        ModuleNode(dtlb_ld_tlb_ld),
1967        ModuleNode(dcache),
1968        ModuleNode(l1d_to_l2_buffer),
1969        CellNode(io.reset_backend)
1970      )
1971    )
1972    ResetGen(leftResetTree, reset, sim = false)
1973    ResetGen(rightResetTree, reset, sim = false)
1974  } else {
1975    io.reset_backend := DontCare
1976  }
1977  io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend
1978  // trace interface
1979  val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top
1980  val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend
1981  traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder)
1982  traceToL2Top.toEncoder.trap  := RegEnable(
1983    traceFromBackend.toEncoder.trap,
1984    traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype)
1985  )
1986  traceToL2Top.toEncoder.priv := RegEnable(
1987    traceFromBackend.toEncoder.priv,
1988    traceFromBackend.toEncoder.groups(0).valid
1989  )
1990  (0 until TraceGroupNum).foreach { i =>
1991    traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid)
1992    traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire)
1993    traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype)
1994    traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable(
1995      traceFromBackend.toEncoder.groups(i).bits.ilastsize,
1996      traceFromBackend.toEncoder.groups(i).valid
1997    )
1998    traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable(
1999      traceFromBackend.toEncoder.groups(i).bits.iaddr,
2000      traceFromBackend.toEncoder.groups(i).valid
2001    ) + (RegEnable(
2002      traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U),
2003      traceFromBackend.toEncoder.groups(i).valid
2004    ) << instOffsetBits)
2005  }
2006
2007
2008  io.mem_to_ooo.storeDebugInfo := DontCare
2009  // store event difftest information
2010  if (env.EnableDifftest) {
2011    (0 until EnsbufferWidth).foreach{i =>
2012        io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx
2013        sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc
2014    }
2015  }
2016
2017  // top-down info
2018  dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2019  dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2020  lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2021  io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache
2022  io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay
2023  io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss
2024  io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio
2025  io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR
2026  dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay
2027  dcache.io.debugRolling := io.debugRolling
2028
2029  lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued
2030  io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty
2031  io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty
2032  io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss
2033  io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss)
2034  io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss)
2035
2036  val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType)))
2037  val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType)))
2038  val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount
2039  val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount
2040  val iqDeqCount = ldDeqCount +& stDeqCount
2041  XSPerfAccumulate("load_iq_deq_count", ldDeqCount)
2042  XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1)
2043  XSPerfAccumulate("store_iq_deq_count", stDeqCount)
2044  XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1)
2045  XSPerfAccumulate("ls_iq_deq_count", iqDeqCount)
2046
2047  val pfevent = Module(new PFEvent)
2048  pfevent.io.distribute_csr := csrCtrl.distribute_csr
2049  val csrevents = pfevent.io.hpmevent.slice(16,24)
2050
2051  val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents)
2052  val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2))
2053  val perfBlock     = Seq(("ldDeqCount", ldDeqCount),
2054                          ("stDeqCount", stDeqCount))
2055  // let index = 0 be no event
2056  val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock
2057
2058  if (printEventCoding) {
2059    for (((name, inc), i) <- allPerfEvents.zipWithIndex) {
2060      println("MemBlock perfEvents Set", name, inc, i)
2061    }
2062  }
2063
2064  val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent))
2065  val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents
2066  generatePerfEvent()
2067}
2068
2069class MemBlock()(implicit p: Parameters) extends LazyModule
2070  with HasXSParameter {
2071  override def shouldBeInlined: Boolean = false
2072
2073  val inner = LazyModule(new MemBlockInlined())
2074
2075  lazy val module = new MemBlockImp(this)
2076}
2077
2078class MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) {
2079  val io = IO(wrapper.inner.module.io.cloneType)
2080  val io_perf = IO(wrapper.inner.module.io_perf.cloneType)
2081  io <> wrapper.inner.module.io
2082  io_perf <> wrapper.inner.module.io_perf
2083
2084  if (p(DebugOptionsKey).ResetGen) {
2085    ResetGen(ResetGenNode(Seq(ModuleNode(wrapper.inner.module))), reset, sim = false)
2086  }
2087}
2088