xref: /XiangShan/src/main/scala/xiangshan/XSCore.scala (revision dc597826530cb6803c2396d6ab0e5eb176b732e0)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan
18
19import chisel3._
20import chisel3.util._
21import xiangshan.backend._
22import xiangshan.backend.fu.HasExceptionNO
23import xiangshan.backend.exu.{ExuConfig, WbArbiter}
24import xiangshan.frontend._
25import xiangshan.cache.mmu._
26import xiangshan.cache.L1plusCacheWrapper
27import chipsalliance.rocketchip.config
28import chipsalliance.rocketchip.config.Parameters
29import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
30import freechips.rocketchip.tile.HasFPUParameters
31import system.{HasSoCParameter, L1CacheErrorInfo, SoCParamsKey}
32import utils._
33
34abstract class XSModule(implicit val p: Parameters) extends MultiIOModule
35  with HasXSParameter
36  with HasExceptionNO
37  with HasFPUParameters {
38  def io: Record
39}
40
41//remove this trait after impl module logic
42trait NeedImpl {
43  this: RawModule =>
44  override protected def IO[T <: Data](iodef: T): T = {
45    println(s"[Warn]: (${this.name}) please reomve 'NeedImpl' after implement this module")
46    val io = chisel3.experimental.IO(iodef)
47    io <> DontCare
48    io
49  }
50}
51
52abstract class XSBundle(implicit val p: Parameters) extends Bundle
53  with HasXSParameter
54
55case class EnviromentParameters
56(
57  FPGAPlatform: Boolean = true,
58  EnableDebug: Boolean = false,
59  EnablePerfDebug: Boolean = true,
60  DualCore: Boolean = false
61)
62
63abstract class XSCoreBase()(implicit p: config.Parameters) extends LazyModule
64  with HasXSParameter with HasExuWbMappingHelper
65{
66  // outer facing nodes
67  val frontend = LazyModule(new Frontend())
68  val l1pluscache = LazyModule(new L1plusCacheWrapper())
69  val ptw = LazyModule(new PTWWrapper())
70
71  val intConfigs = exuConfigs.filter(_.writeIntRf)
72  val intArbiter = LazyModule(new WbArbiter(intConfigs, NRIntWritePorts, isFp = false))
73  val intWbPorts = intArbiter.allConnections.map(c => c.map(intConfigs(_)))
74  val numIntWbPorts = intWbPorts.length
75
76  val fpConfigs = exuConfigs.filter(_.writeFpRf)
77  val fpArbiter = LazyModule(new WbArbiter(fpConfigs, NRFpWritePorts, isFp = true))
78  val fpWbPorts = fpArbiter.allConnections.map(c => c.map(fpConfigs(_)))
79  val numFpWbPorts = fpWbPorts.length
80
81  // TODO: better RS organization
82  // generate rs according to number of function units
83  require(exuParameters.JmpCnt == 1)
84  require(exuParameters.MduCnt <= exuParameters.AluCnt && exuParameters.MduCnt > 0)
85  require(exuParameters.FmiscCnt <= exuParameters.FmacCnt && exuParameters.FmiscCnt > 0)
86  require(exuParameters.LduCnt == 2 && exuParameters.StuCnt == 2)
87
88  // one RS every 2 MDUs
89  val schedulePorts = Seq(
90    // exuCfg, numDeq, intFastWakeupTarget, fpFastWakeupTarget
91    Seq(
92      (AluExeUnitCfg, exuParameters.AluCnt, Seq(AluExeUnitCfg, MulDivExeUnitCfg, JumpCSRExeUnitCfg, LdExeUnitCfg, StaExeUnitCfg), Seq())
93    ),
94    Seq(
95      (MulDivExeUnitCfg, exuParameters.MduCnt, Seq(AluExeUnitCfg, MulDivExeUnitCfg, JumpCSRExeUnitCfg, LdExeUnitCfg, StaExeUnitCfg), Seq()),
96      (JumpCSRExeUnitCfg, 1, Seq(), Seq()),
97      (StdExeUnitCfg, exuParameters.StuCnt, Seq(), Seq())
98    ),
99    Seq(
100      (FmacExeUnitCfg, exuParameters.FmacCnt, Seq(), Seq(FmacExeUnitCfg, FmiscExeUnitCfg)),
101      (FmiscExeUnitCfg, exuParameters.FmiscCnt, Seq(), Seq())
102    ),
103    Seq(
104      (LdExeUnitCfg, exuParameters.LduCnt, Seq(AluExeUnitCfg, LdExeUnitCfg), Seq()),
105      (StaExeUnitCfg, exuParameters.StuCnt, Seq(), Seq())
106    )
107  )
108
109  // should do outer fast wakeup ports here
110  val otherFastPorts = schedulePorts.zipWithIndex.map { case (sche, i) =>
111    val otherCfg = schedulePorts.zipWithIndex.filter(_._2 != i).map(_._1).reduce(_ ++ _)
112    val outerPorts = sche.map(cfg => {
113      // exe units from this scheduler need fastUops from exeunits
114      val outerWakeupInSche = sche.filter(_._1.wakeupFromExu)
115      val intraIntScheOuter = outerWakeupInSche.filter(_._3.contains(cfg._1)).map(_._1)
116      val intraFpScheOuter = outerWakeupInSche.filter(_._4.contains(cfg._1)).map(_._1)
117      // exe units from other schedulers need fastUop from outside
118      val otherIntSource = otherCfg.filter(_._3.contains(cfg._1)).map(_._1)
119      val otherFpSource = otherCfg.filter(_._4.contains(cfg._1)).map(_._1)
120      val intSource = findInWbPorts(intWbPorts, intraIntScheOuter ++ otherIntSource)
121      val fpSource = findInWbPorts(fpWbPorts, intraFpScheOuter ++ otherFpSource)
122      getFastWakeupIndex(cfg._1, intSource, fpSource, numIntWbPorts).sorted
123    })
124    println(s"inter-scheduler wakeup sources for $i: $outerPorts")
125    outerPorts
126  }
127
128  // allow mdu and fmisc to have 2*numDeq enqueue ports
129  val intDpPorts = (0 until exuParameters.AluCnt).map(i => Seq((0, i)))
130  val int1DpPorts = (0 until exuParameters.MduCnt).map(i => {
131    if (i < exuParameters.JmpCnt) Seq((0, i), (1, i))
132    else Seq((0, i))
133  }) ++ (0 until exuParameters.StuCnt).map(i => Seq((2, i)))
134  val fpDpPorts = (0 until exuParameters.FmacCnt).map(i => {
135    if (i < 2*exuParameters.FmiscCnt) Seq((0, i), (1, i))
136    else Seq((1, i))
137  })
138  val lsDpPorts = Seq(
139    Seq((0, 0)),
140    Seq((0, 1)),
141    Seq((1, 0)),
142    Seq((1, 1))
143  )
144  val dispatchPorts = Seq(intDpPorts, int1DpPorts, fpDpPorts, lsDpPorts)
145
146  val outFpRfReadPorts = Seq(0, 0, 2, 0)
147  val exuBlocks = schedulePorts.zip(dispatchPorts).zip(otherFastPorts).zip(outFpRfReadPorts).reverse.drop(1).reverseMap { case (((sche, disp), other), ofp) =>
148    LazyModule(new ExuBlock(sche, disp, intWbPorts, fpWbPorts, other, ofp))
149  }
150
151  val memScheduler = LazyModule(new Scheduler(schedulePorts.last, dispatchPorts.last, intWbPorts, fpWbPorts, otherFastPorts.last, outFpRfReadPorts.last))
152  val memBlock = LazyModule(new MemBlock()(p.alter((site, here, up) => {
153    case XSCoreParamsKey => up(XSCoreParamsKey).copy(
154      IssQueSize = memScheduler.memRsEntries.max
155    )
156  })))
157}
158
159class XSCore()(implicit p: config.Parameters) extends XSCoreBase
160  with HasXSDts
161{
162  lazy val module = new XSCoreImp(this)
163}
164
165class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
166  with HasXSParameter
167  with HasSoCParameter
168  with HasExeBlockHelper {
169  val io = IO(new Bundle {
170    val hartId = Input(UInt(64.W))
171    val externalInterrupt = new ExternalInterruptIO
172    val l2_pf_enable = Output(Bool())
173    val l1plus_error, icache_error, dcache_error = Output(new L1CacheErrorInfo)
174  })
175
176  println(s"FPGAPlatform:${env.FPGAPlatform} EnableDebug:${env.EnableDebug}")
177  AddressSpace.checkMemmap()
178  AddressSpace.printMemmap()
179
180  val ctrlBlock = Module(new CtrlBlock)
181
182  val frontend = outer.frontend.module
183  val memBlock = outer.memBlock.module
184  val l1pluscache = outer.l1pluscache.module
185  val ptw = outer.ptw.module
186  val exuBlocks = outer.exuBlocks.map(_.module)
187  val memScheduler = outer.memScheduler.module
188
189  val allWriteback = exuBlocks.map(_.io.fuWriteback).fold(Seq())(_ ++ _) ++ memBlock.io.writeback
190
191  val intWriteback = allWriteback.zip(exuConfigs).filter(_._2.writeIntRf).map(_._1)
192  require(exuConfigs.length == allWriteback.length)
193  // set default value for ready
194  exuBlocks.foreach(_.io.fuWriteback.foreach(_.ready := true.B))
195  memBlock.io.writeback.foreach(_.ready := true.B)
196
197  val intArbiter = outer.intArbiter.module
198  intArbiter.io.in.zip(intWriteback).foreach { case (arb, wb) =>
199    arb.valid := wb.valid && !wb.bits.uop.ctrl.fpWen
200    arb.bits := wb.bits
201    when (arb.valid) {
202      wb.ready := arb.ready
203    }
204  }
205
206  val fpArbiter = outer.fpArbiter.module
207  val fpWriteback = allWriteback.zip(exuConfigs).filter(_._2.writeFpRf).map(_._1)
208  fpArbiter.io.in.zip(fpWriteback).foreach{ case (arb, wb) =>
209    arb.valid := wb.valid && wb.bits.uop.ctrl.fpWen
210    arb.bits := wb.bits
211    when (arb.valid) {
212      wb.ready := arb.ready
213    }
214  }
215
216  val rfWriteback = VecInit(intArbiter.io.out ++ fpArbiter.io.out)
217
218  io.l1plus_error <> l1pluscache.io.error
219  io.icache_error <> frontend.io.error
220  io.dcache_error <> memBlock.io.error
221
222  require(exuBlocks.count(_.fuConfigs.map(_._1).contains(JumpCSRExeUnitCfg)) == 1)
223  val csrFenceMod = exuBlocks.filter(_.fuConfigs.map(_._1).contains(JumpCSRExeUnitCfg)).head
224  val csrioIn = csrFenceMod.io.fuExtra.csrio.get
225  val fenceio = csrFenceMod.io.fuExtra.fenceio.get
226
227  frontend.io.backend <> ctrlBlock.io.frontend
228  frontend.io.sfence <> fenceio.sfence
229  frontend.io.tlbCsr <> csrioIn.tlb
230  frontend.io.csrCtrl <> csrioIn.customCtrl
231
232  frontend.io.icacheMemAcq <> l1pluscache.io.req
233  l1pluscache.io.resp <> frontend.io.icacheMemGrant
234  l1pluscache.io.flush := frontend.io.l1plusFlush
235  frontend.io.fencei := fenceio.fencei
236
237  ctrlBlock.io.csrCtrl <> csrioIn.customCtrl
238  val redirectBlocks = exuBlocks.reverse.filter(_.fuConfigs.map(_._1).map(_.hasRedirect).reduce(_ || _))
239  ctrlBlock.io.exuRedirect <> redirectBlocks.map(_.io.fuExtra.exuRedirect).fold(Seq())(_ ++ _)
240  ctrlBlock.io.stIn <> memBlock.io.stIn
241  ctrlBlock.io.stOut <> memBlock.io.stOut
242  ctrlBlock.io.memoryViolation <> memBlock.io.memoryViolation
243  ctrlBlock.io.enqLsq <> memBlock.io.enqLsq
244  ctrlBlock.io.writeback <> rfWriteback
245
246  val allFastUop = exuBlocks.map(_.io.fastUopOut).fold(Seq())(_ ++ _) ++ memBlock.io.otherFastWakeup
247  val intFastUop = allFastUop.zip(exuConfigs).filter(_._2.writeIntRf).map(_._1)
248  val fpFastUop = allFastUop.zip(exuConfigs).filter(_._2.writeFpRf).map(_._1)
249  val intFastUop1 = outer.intArbiter.allConnections.map(c => intFastUop(c.head))
250  val fpFastUop1 = outer.fpArbiter.allConnections.map(c => fpFastUop(c.head))
251  val allFastUop1 = intFastUop1 ++ fpFastUop1
252
253  ctrlBlock.io.enqIQ <> exuBlocks(0).io.allocate ++ exuBlocks(2).io.allocate ++ memScheduler.io.allocate
254  for (i <- 0 until exuParameters.MduCnt) {
255    val rsIn = VecInit(Seq(exuBlocks(0).io.allocate(i), exuBlocks(1).io.allocate(i)))
256    val func1 = (op: MicroOp) => outer.exuBlocks(0).scheduler.canAccept(op.ctrl.fuType)
257    val func2 = (op: MicroOp) => outer.exuBlocks(1).scheduler.canAccept(op.ctrl.fuType)
258    val arbiterOut = DispatchArbiter(ctrlBlock.io.enqIQ(i), Seq(func1, func2))
259    rsIn <> arbiterOut
260  }
261  for (i <- exuParameters.MduCnt until exuParameters.AluCnt) {
262    val rsIn = exuBlocks(0).io.allocate(i)
263    val dpOut = ctrlBlock.io.enqIQ(i)
264    rsIn.valid := dpOut.valid && outer.exuBlocks(0).scheduler.canAccept(dpOut.bits.ctrl.fuType)
265    dpOut.ready := rsIn.ready && outer.exuBlocks(0).scheduler.canAccept(dpOut.bits.ctrl.fuType)
266  }
267
268  val stdAllocate = exuBlocks(1).io.allocate.takeRight(2)
269  val staAllocate = memScheduler.io.allocate.takeRight(2)
270  stdAllocate.zip(staAllocate).zip(ctrlBlock.io.enqIQ.takeRight(2)).zipWithIndex.foreach{ case (((std, sta), enq), i) =>
271    std.valid := enq.valid && sta.ready
272    sta.valid := enq.valid && std.ready
273    std.bits := enq.bits
274    sta.bits := enq.bits
275    std.bits.ctrl.lsrc(0) := enq.bits.ctrl.lsrc(1)
276    std.bits.psrc(0) := enq.bits.psrc(1)
277    std.bits.srcState(0) := enq.bits.srcState(1)
278    std.bits.ctrl.srcType(0) := enq.bits.ctrl.srcType(1)
279    enq.ready := sta.ready && std.ready
280    XSPerfAccumulate(s"st_rs_not_ready_$i", enq.valid && !enq.ready)
281    XSPerfAccumulate(s"sta_rs_not_ready_$i", sta.valid && !sta.ready)
282    XSPerfAccumulate(s"std_rs_not_ready_$i", std.valid && !std.ready)
283  }
284  exuBlocks(1).io.scheExtra.fpRfReadIn.get <> exuBlocks(2).io.scheExtra.fpRfReadOut.get
285
286  memScheduler.io.redirect <> ctrlBlock.io.redirect
287  memScheduler.io.flush <> ctrlBlock.io.flush
288  memBlock.io.issue <> memScheduler.io.issue
289  memScheduler.io.writeback <> rfWriteback
290  memScheduler.io.fastUopIn <> allFastUop1
291  memScheduler.io.extra.jumpPc <> ctrlBlock.io.jumpPc
292  memScheduler.io.extra.jalr_target <> ctrlBlock.io.jalr_target
293  memScheduler.io.extra.stIssuePtr <> memBlock.io.stIssuePtr
294  memScheduler.io.extra.debug_int_rat <> ctrlBlock.io.debug_int_rat
295  memScheduler.io.extra.debug_fp_rat <> ctrlBlock.io.debug_fp_rat
296
297  exuBlocks.map(_.io).foreach { exu =>
298    exu.redirect <> ctrlBlock.io.redirect
299    exu.flush <> ctrlBlock.io.flush
300    exu.rfWriteback <> rfWriteback
301    exu.fastUopIn <> allFastUop1
302    exu.scheExtra.jumpPc <> ctrlBlock.io.jumpPc
303    exu.scheExtra.jalr_target <> ctrlBlock.io.jalr_target
304    exu.scheExtra.stIssuePtr <> memBlock.io.stIssuePtr
305    exu.scheExtra.debug_fp_rat <> ctrlBlock.io.debug_fp_rat
306    exu.scheExtra.debug_int_rat <> ctrlBlock.io.debug_int_rat
307  }
308  XSPerfHistogram("fastIn_count", PopCount(allFastUop1.map(_.valid)), true.B, 0, allFastUop1.length, 1)
309  XSPerfHistogram("wakeup_count", PopCount(rfWriteback.map(_.valid)), true.B, 0, rfWriteback.length, 1)
310
311  csrioIn.hartId <> io.hartId
312  csrioIn.perf <> DontCare
313  csrioIn.perf.retiredInstr <> ctrlBlock.io.roqio.toCSR.perfinfo.retiredInstr
314  csrioIn.perf.bpuInfo <> ctrlBlock.io.perfInfo.bpuInfo
315  csrioIn.perf.ctrlInfo <> ctrlBlock.io.perfInfo.ctrlInfo
316  csrioIn.perf.memInfo <> memBlock.io.memInfo
317  csrioIn.perf.frontendInfo <> frontend.io.frontendInfo
318
319  csrioIn.fpu.fflags <> ctrlBlock.io.roqio.toCSR.fflags
320  csrioIn.fpu.isIllegal := false.B
321  csrioIn.fpu.dirty_fs <> ctrlBlock.io.roqio.toCSR.dirty_fs
322  csrioIn.fpu.frm <> exuBlocks(2).io.fuExtra.frm.get
323  csrioIn.exception <> ctrlBlock.io.roqio.exception
324  csrioIn.isXRet <> ctrlBlock.io.roqio.toCSR.isXRet
325  csrioIn.trapTarget <> ctrlBlock.io.roqio.toCSR.trapTarget
326  csrioIn.interrupt <> ctrlBlock.io.roqio.toCSR.intrBitSet
327  csrioIn.memExceptionVAddr <> memBlock.io.lsqio.exceptionAddr.vaddr
328  csrioIn.externalInterrupt <> io.externalInterrupt
329
330  fenceio.sfence <> memBlock.io.sfence
331  fenceio.sbuffer <> memBlock.io.fenceToSbuffer
332
333  memBlock.io.redirect <> ctrlBlock.io.redirect
334  memBlock.io.flush <> ctrlBlock.io.flush
335  memBlock.io.replay <> memScheduler.io.extra.feedback.get.map(_.replay)
336  memBlock.io.rsIdx <> memScheduler.io.extra.feedback.get.map(_.rsIdx)
337  memBlock.io.isFirstIssue <> memScheduler.io.extra.feedback.get.map(_.isFirstIssue)
338  val stData = exuBlocks.map(_.io.fuExtra.stData.getOrElse(Seq())).reduce(_ ++ _)
339  memBlock.io.stData := stData
340  memBlock.io.csrCtrl <> csrioIn.customCtrl
341  memBlock.io.tlbCsr <> csrioIn.tlb
342  memBlock.io.lsqio.roq <> ctrlBlock.io.roqio.lsq
343  memBlock.io.lsqio.exceptionAddr.lsIdx.lqIdx := ctrlBlock.io.roqio.exception.bits.uop.lqIdx
344  memBlock.io.lsqio.exceptionAddr.lsIdx.sqIdx := ctrlBlock.io.roqio.exception.bits.uop.sqIdx
345  memBlock.io.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.roqio.exception.bits.uop.ctrl.commitType)
346
347  val itlbRepeater = Module(new PTWRepeater())
348  val dtlbRepeater = if (usePTWRepeater) {
349    Module(new PTWRepeater(LoadPipelineWidth + StorePipelineWidth))
350  } else {
351    Module(new PTWFilter(LoadPipelineWidth + StorePipelineWidth, l2tlbParams.missQueueSize))
352  }
353  itlbRepeater.io.tlb <> frontend.io.ptw
354  dtlbRepeater.io.tlb <> memBlock.io.ptw
355  itlbRepeater.io.sfence <> fenceio.sfence
356  dtlbRepeater.io.sfence <> fenceio.sfence
357  ptw.io.tlb(0) <> itlbRepeater.io.ptw
358  ptw.io.tlb(1) <> dtlbRepeater.io.ptw
359  ptw.io.sfence <> fenceio.sfence
360  ptw.io.csr <> csrioIn.tlb
361
362  // if l2 prefetcher use stream prefetch, it should be placed in XSCore
363  assert(l2PrefetcherParameters._type == "bop")
364  io.l2_pf_enable := csrioIn.customCtrl.l2_pf_enable
365
366  val l1plus_reset_gen = Module(new ResetGen(1, !debugOpts.FPGAPlatform))
367  l1pluscache.reset := l1plus_reset_gen.io.out
368
369  val ptw_reset_gen = Module(new ResetGen(2, !debugOpts.FPGAPlatform))
370  ptw.reset := ptw_reset_gen.io.out
371  itlbRepeater.reset := ptw_reset_gen.io.out
372  dtlbRepeater.reset := ptw_reset_gen.io.out
373
374  val memBlock_reset_gen = Module(new ResetGen(3, !debugOpts.FPGAPlatform))
375  memBlock.reset := memBlock_reset_gen.io.out
376
377  val exuBlock_reset_gen = Module(new ResetGen(4, !debugOpts.FPGAPlatform))
378  exuBlocks.foreach(_.reset := exuBlock_reset_gen.io.out)
379
380  val ctrlBlock_reset_gen = Module(new ResetGen(6, !debugOpts.FPGAPlatform))
381  ctrlBlock.reset := ctrlBlock_reset_gen.io.out
382
383  val frontend_reset_gen = Module(new ResetGen(7, !debugOpts.FPGAPlatform))
384  frontend.reset := frontend_reset_gen.io.out
385}
386