1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan 18 19import chisel3._ 20import chisel3.util._ 21import xiangshan.backend._ 22import xiangshan.backend.fu.HasExceptionNO 23import xiangshan.backend.exu.{ExuConfig, WbArbiter} 24import xiangshan.frontend._ 25import xiangshan.cache.mmu._ 26import chipsalliance.rocketchip.config 27import chipsalliance.rocketchip.config.Parameters 28import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp} 29import freechips.rocketchip.tile.HasFPUParameters 30import system.{HasSoCParameter, L1CacheErrorInfo, SoCParamsKey} 31import utils._ 32 33abstract class XSModule(implicit val p: Parameters) extends MultiIOModule 34 with HasXSParameter 35 with HasExceptionNO 36 with HasFPUParameters { 37 def io: Record 38} 39 40//remove this trait after impl module logic 41trait NeedImpl { 42 this: RawModule => 43 override protected def IO[T <: Data](iodef: T): T = { 44 println(s"[Warn]: (${this.name}) please reomve 'NeedImpl' after implement this module") 45 val io = chisel3.experimental.IO(iodef) 46 io <> DontCare 47 io 48 } 49} 50 51abstract class XSBundle(implicit val p: Parameters) extends Bundle 52 with HasXSParameter 53 54case class EnviromentParameters 55( 56 FPGAPlatform: Boolean = true, 57 EnableDebug: Boolean = false, 58 EnablePerfDebug: Boolean = true, 59 DualCore: Boolean = false 60) 61 62abstract class XSCoreBase()(implicit p: config.Parameters) extends LazyModule 63 with HasXSParameter with HasExuWbMappingHelper 64{ 65 // outer facing nodes 66 val frontend = LazyModule(new Frontend()) 67 val ptw = LazyModule(new PTWWrapper()) 68 69 val intConfigs = exuConfigs.filter(_.writeIntRf) 70 val intArbiter = LazyModule(new WbArbiter(intConfigs, NRIntWritePorts, isFp = false)) 71 val intWbPorts = intArbiter.allConnections.map(c => c.map(intConfigs(_))) 72 val numIntWbPorts = intWbPorts.length 73 74 val fpConfigs = exuConfigs.filter(_.writeFpRf) 75 val fpArbiter = LazyModule(new WbArbiter(fpConfigs, NRFpWritePorts, isFp = true)) 76 val fpWbPorts = fpArbiter.allConnections.map(c => c.map(fpConfigs(_))) 77 val numFpWbPorts = fpWbPorts.length 78 79 // TODO: better RS organization 80 // generate rs according to number of function units 81 require(exuParameters.JmpCnt == 1) 82 require(exuParameters.MduCnt <= exuParameters.AluCnt && exuParameters.MduCnt > 0) 83 require(exuParameters.FmiscCnt <= exuParameters.FmacCnt && exuParameters.FmiscCnt > 0) 84 require(exuParameters.LduCnt == 2 && exuParameters.StuCnt == 2) 85 86 // one RS every 2 MDUs 87 val schedulePorts = Seq( 88 // exuCfg, numDeq, intFastWakeupTarget, fpFastWakeupTarget 89 Seq( 90 (AluExeUnitCfg, exuParameters.AluCnt, Seq(AluExeUnitCfg, MulDivExeUnitCfg, JumpCSRExeUnitCfg, LdExeUnitCfg, StaExeUnitCfg), Seq()) 91 ), 92 Seq( 93 (MulDivExeUnitCfg, exuParameters.MduCnt, Seq(AluExeUnitCfg, MulDivExeUnitCfg, JumpCSRExeUnitCfg, LdExeUnitCfg, StaExeUnitCfg), Seq()), 94 (JumpCSRExeUnitCfg, 1, Seq(), Seq()), 95 (StdExeUnitCfg, exuParameters.StuCnt, Seq(), Seq()) 96 ), 97 Seq( 98 (FmacExeUnitCfg, exuParameters.FmacCnt, Seq(), Seq(FmacExeUnitCfg, FmiscExeUnitCfg)), 99 (FmiscExeUnitCfg, exuParameters.FmiscCnt, Seq(), Seq()) 100 ), 101 Seq( 102 (LdExeUnitCfg, exuParameters.LduCnt, Seq(AluExeUnitCfg, LdExeUnitCfg), Seq()), 103 (StaExeUnitCfg, exuParameters.StuCnt, Seq(), Seq()) 104 ) 105 ) 106 107 // should do outer fast wakeup ports here 108 val otherFastPorts = schedulePorts.zipWithIndex.map { case (sche, i) => 109 val otherCfg = schedulePorts.zipWithIndex.filter(_._2 != i).map(_._1).reduce(_ ++ _) 110 val outerPorts = sche.map(cfg => { 111 // exe units from this scheduler need fastUops from exeunits 112 val outerWakeupInSche = sche.filter(_._1.wakeupFromExu) 113 val intraIntScheOuter = outerWakeupInSche.filter(_._3.contains(cfg._1)).map(_._1) 114 val intraFpScheOuter = outerWakeupInSche.filter(_._4.contains(cfg._1)).map(_._1) 115 // exe units from other schedulers need fastUop from outside 116 val otherIntSource = otherCfg.filter(_._3.contains(cfg._1)).map(_._1) 117 val otherFpSource = otherCfg.filter(_._4.contains(cfg._1)).map(_._1) 118 val intSource = findInWbPorts(intWbPorts, intraIntScheOuter ++ otherIntSource) 119 val fpSource = findInWbPorts(fpWbPorts, intraFpScheOuter ++ otherFpSource) 120 getFastWakeupIndex(cfg._1, intSource, fpSource, numIntWbPorts).sorted 121 }) 122 println(s"inter-scheduler wakeup sources for $i: $outerPorts") 123 outerPorts 124 } 125 126 // allow mdu and fmisc to have 2*numDeq enqueue ports 127 val intDpPorts = (0 until exuParameters.AluCnt).map(i => Seq((0, i))) 128 val int1DpPorts = (0 until exuParameters.MduCnt).map(i => { 129 if (i < exuParameters.JmpCnt) Seq((0, i), (1, i)) 130 else Seq((0, i)) 131 }) ++ (0 until exuParameters.StuCnt).map(i => Seq((2, i))) 132 val fpDpPorts = (0 until exuParameters.FmacCnt).map(i => { 133 if (i < 2*exuParameters.FmiscCnt) Seq((0, i), (1, i)) 134 else Seq((1, i)) 135 }) 136 val lsDpPorts = Seq( 137 Seq((0, 0)), 138 Seq((0, 1)), 139 Seq((1, 0)), 140 Seq((1, 1)) 141 ) 142 val dispatchPorts = Seq(intDpPorts, int1DpPorts, fpDpPorts, lsDpPorts) 143 144 val outFpRfReadPorts = Seq(0, 0, 2, 0) 145 val exuBlocks = schedulePorts.zip(dispatchPorts).zip(otherFastPorts).zip(outFpRfReadPorts).reverse.drop(1).reverseMap { case (((sche, disp), other), ofp) => 146 LazyModule(new ExuBlock(sche, disp, intWbPorts, fpWbPorts, other, ofp)) 147 } 148 149 val memScheduler = LazyModule(new Scheduler(schedulePorts.last, dispatchPorts.last, intWbPorts, fpWbPorts, otherFastPorts.last, outFpRfReadPorts.last)) 150 val memBlock = LazyModule(new MemBlock()(p.alter((site, here, up) => { 151 case XSCoreParamsKey => up(XSCoreParamsKey).copy( 152 IssQueSize = memScheduler.memRsEntries.max 153 ) 154 }))) 155} 156 157class XSCore()(implicit p: config.Parameters) extends XSCoreBase 158 with HasXSDts 159{ 160 lazy val module = new XSCoreImp(this) 161} 162 163class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer) 164 with HasXSParameter 165 with HasSoCParameter 166 with HasExeBlockHelper { 167 val io = IO(new Bundle { 168 val hartId = Input(UInt(64.W)) 169 val externalInterrupt = new ExternalInterruptIO 170 val l2_pf_enable = Output(Bool()) 171 val l1plus_error, icache_error, dcache_error = Output(new L1CacheErrorInfo) 172 }) 173 174 println(s"FPGAPlatform:${env.FPGAPlatform} EnableDebug:${env.EnableDebug}") 175 AddressSpace.checkMemmap() 176 AddressSpace.printMemmap() 177 178 val ctrlBlock = Module(new CtrlBlock) 179 180 val frontend = outer.frontend.module 181 val memBlock = outer.memBlock.module 182 val ptw = outer.ptw.module 183 val exuBlocks = outer.exuBlocks.map(_.module) 184 val memScheduler = outer.memScheduler.module 185 186 val allWriteback = exuBlocks.map(_.io.fuWriteback).fold(Seq())(_ ++ _) ++ memBlock.io.writeback 187 188 val intWriteback = allWriteback.zip(exuConfigs).filter(_._2.writeIntRf).map(_._1) 189 require(exuConfigs.length == allWriteback.length) 190 // set default value for ready 191 exuBlocks.foreach(_.io.fuWriteback.foreach(_.ready := true.B)) 192 memBlock.io.writeback.foreach(_.ready := true.B) 193 194 val intArbiter = outer.intArbiter.module 195 intArbiter.io.in.zip(intWriteback).foreach { case (arb, wb) => 196 arb.valid := wb.valid && !wb.bits.uop.ctrl.fpWen 197 arb.bits := wb.bits 198 when (arb.valid) { 199 wb.ready := arb.ready 200 } 201 } 202 203 val fpArbiter = outer.fpArbiter.module 204 val fpWriteback = allWriteback.zip(exuConfigs).filter(_._2.writeFpRf).map(_._1) 205 fpArbiter.io.in.zip(fpWriteback).foreach{ case (arb, wb) => 206 arb.valid := wb.valid && wb.bits.uop.ctrl.fpWen 207 arb.bits := wb.bits 208 when (arb.valid) { 209 wb.ready := arb.ready 210 } 211 } 212 213 val rfWriteback = VecInit(intArbiter.io.out ++ fpArbiter.io.out) 214 215 io.l1plus_error <> DontCare 216 io.icache_error <> frontend.io.error 217 io.dcache_error <> memBlock.io.error 218 219 require(exuBlocks.count(_.fuConfigs.map(_._1).contains(JumpCSRExeUnitCfg)) == 1) 220 val csrFenceMod = exuBlocks.filter(_.fuConfigs.map(_._1).contains(JumpCSRExeUnitCfg)).head 221 val csrioIn = csrFenceMod.io.fuExtra.csrio.get 222 val fenceio = csrFenceMod.io.fuExtra.fenceio.get 223 224 frontend.io.backend <> ctrlBlock.io.frontend 225 frontend.io.sfence <> fenceio.sfence 226 frontend.io.tlbCsr <> csrioIn.tlb 227 frontend.io.csrCtrl <> csrioIn.customCtrl 228 frontend.io.fencei := fenceio.fencei 229 230 ctrlBlock.io.csrCtrl <> csrioIn.customCtrl 231 val redirectBlocks = exuBlocks.reverse.filter(_.fuConfigs.map(_._1).map(_.hasRedirect).reduce(_ || _)) 232 ctrlBlock.io.exuRedirect <> redirectBlocks.map(_.io.fuExtra.exuRedirect).fold(Seq())(_ ++ _) 233 ctrlBlock.io.stIn <> memBlock.io.stIn 234 ctrlBlock.io.stOut <> memBlock.io.stOut 235 ctrlBlock.io.memoryViolation <> memBlock.io.memoryViolation 236 ctrlBlock.io.enqLsq <> memBlock.io.enqLsq 237 ctrlBlock.io.writeback <> rfWriteback 238 239 val allFastUop = exuBlocks.map(_.io.fastUopOut).fold(Seq())(_ ++ _) ++ memBlock.io.otherFastWakeup 240 val intFastUop = allFastUop.zip(exuConfigs).filter(_._2.writeIntRf).map(_._1) 241 val fpFastUop = allFastUop.zip(exuConfigs).filter(_._2.writeFpRf).map(_._1) 242 val intFastUop1 = outer.intArbiter.allConnections.map(c => intFastUop(c.head)) 243 val fpFastUop1 = outer.fpArbiter.allConnections.map(c => fpFastUop(c.head)) 244 val allFastUop1 = intFastUop1 ++ fpFastUop1 245 246 ctrlBlock.io.enqIQ <> exuBlocks(0).io.allocate ++ exuBlocks(2).io.allocate ++ memScheduler.io.allocate 247 for (i <- 0 until exuParameters.MduCnt) { 248 val rsIn = VecInit(Seq(exuBlocks(0).io.allocate(i), exuBlocks(1).io.allocate(i))) 249 val func1 = (op: MicroOp) => outer.exuBlocks(0).scheduler.canAccept(op.ctrl.fuType) 250 val func2 = (op: MicroOp) => outer.exuBlocks(1).scheduler.canAccept(op.ctrl.fuType) 251 val arbiterOut = DispatchArbiter(ctrlBlock.io.enqIQ(i), Seq(func1, func2)) 252 rsIn <> arbiterOut 253 } 254 for (i <- exuParameters.MduCnt until exuParameters.AluCnt) { 255 val rsIn = exuBlocks(0).io.allocate(i) 256 val dpOut = ctrlBlock.io.enqIQ(i) 257 rsIn.valid := dpOut.valid && outer.exuBlocks(0).scheduler.canAccept(dpOut.bits.ctrl.fuType) 258 dpOut.ready := rsIn.ready && outer.exuBlocks(0).scheduler.canAccept(dpOut.bits.ctrl.fuType) 259 } 260 261 val stdAllocate = exuBlocks(1).io.allocate.takeRight(2) 262 val staAllocate = memScheduler.io.allocate.takeRight(2) 263 stdAllocate.zip(staAllocate).zip(ctrlBlock.io.enqIQ.takeRight(2)).zipWithIndex.foreach{ case (((std, sta), enq), i) => 264 std.valid := enq.valid && sta.ready 265 sta.valid := enq.valid && std.ready 266 std.bits := enq.bits 267 sta.bits := enq.bits 268 std.bits.ctrl.lsrc(0) := enq.bits.ctrl.lsrc(1) 269 std.bits.psrc(0) := enq.bits.psrc(1) 270 std.bits.srcState(0) := enq.bits.srcState(1) 271 std.bits.ctrl.srcType(0) := enq.bits.ctrl.srcType(1) 272 enq.ready := sta.ready && std.ready 273 XSPerfAccumulate(s"st_rs_not_ready_$i", enq.valid && !enq.ready) 274 XSPerfAccumulate(s"sta_rs_not_ready_$i", sta.valid && !sta.ready) 275 XSPerfAccumulate(s"std_rs_not_ready_$i", std.valid && !std.ready) 276 } 277 exuBlocks(1).io.scheExtra.fpRfReadIn.get <> exuBlocks(2).io.scheExtra.fpRfReadOut.get 278 279 memScheduler.io.redirect <> ctrlBlock.io.redirect 280 memScheduler.io.flush <> ctrlBlock.io.flush 281 memBlock.io.issue <> memScheduler.io.issue 282 memScheduler.io.writeback <> rfWriteback 283 memScheduler.io.fastUopIn <> allFastUop1 284 memScheduler.io.extra.jumpPc <> ctrlBlock.io.jumpPc 285 memScheduler.io.extra.jalr_target <> ctrlBlock.io.jalr_target 286 memScheduler.io.extra.stIssuePtr <> memBlock.io.stIssuePtr 287 memScheduler.io.extra.loadFastMatch.get <> memBlock.io.loadFastMatch 288 memScheduler.io.extra.debug_int_rat <> ctrlBlock.io.debug_int_rat 289 memScheduler.io.extra.debug_fp_rat <> ctrlBlock.io.debug_fp_rat 290 291 exuBlocks.map(_.io).foreach { exu => 292 exu.redirect <> ctrlBlock.io.redirect 293 exu.flush <> ctrlBlock.io.flush 294 exu.rfWriteback <> rfWriteback 295 exu.fastUopIn <> allFastUop1 296 exu.scheExtra.jumpPc <> ctrlBlock.io.jumpPc 297 exu.scheExtra.jalr_target <> ctrlBlock.io.jalr_target 298 exu.scheExtra.stIssuePtr <> memBlock.io.stIssuePtr 299 exu.scheExtra.debug_fp_rat <> ctrlBlock.io.debug_fp_rat 300 exu.scheExtra.debug_int_rat <> ctrlBlock.io.debug_int_rat 301 } 302 XSPerfHistogram("fastIn_count", PopCount(allFastUop1.map(_.valid)), true.B, 0, allFastUop1.length, 1) 303 XSPerfHistogram("wakeup_count", PopCount(rfWriteback.map(_.valid)), true.B, 0, rfWriteback.length, 1) 304 305 csrioIn.hartId <> io.hartId 306 csrioIn.perf <> DontCare 307 csrioIn.perf.retiredInstr <> ctrlBlock.io.roqio.toCSR.perfinfo.retiredInstr 308 csrioIn.perf.ctrlInfo <> ctrlBlock.io.perfInfo.ctrlInfo 309 csrioIn.perf.memInfo <> memBlock.io.memInfo 310 csrioIn.perf.frontendInfo <> frontend.io.frontendInfo 311 312 csrioIn.fpu.fflags <> ctrlBlock.io.roqio.toCSR.fflags 313 csrioIn.fpu.isIllegal := false.B 314 csrioIn.fpu.dirty_fs <> ctrlBlock.io.roqio.toCSR.dirty_fs 315 csrioIn.fpu.frm <> exuBlocks(2).io.fuExtra.frm.get 316 csrioIn.exception <> ctrlBlock.io.roqio.exception 317 csrioIn.isXRet <> ctrlBlock.io.roqio.toCSR.isXRet 318 csrioIn.trapTarget <> ctrlBlock.io.roqio.toCSR.trapTarget 319 csrioIn.interrupt <> ctrlBlock.io.roqio.toCSR.intrBitSet 320 csrioIn.memExceptionVAddr <> memBlock.io.lsqio.exceptionAddr.vaddr 321 csrioIn.externalInterrupt <> io.externalInterrupt 322 323 fenceio.sfence <> memBlock.io.sfence 324 fenceio.sbuffer <> memBlock.io.fenceToSbuffer 325 326 memBlock.io.redirect <> ctrlBlock.io.redirect 327 memBlock.io.flush <> ctrlBlock.io.flush 328 memBlock.io.replay <> memScheduler.io.extra.feedback.get.map(_.replay) 329 memBlock.io.rsIdx <> memScheduler.io.extra.feedback.get.map(_.rsIdx) 330 memBlock.io.isFirstIssue <> memScheduler.io.extra.feedback.get.map(_.isFirstIssue) 331 val stData = exuBlocks.map(_.io.fuExtra.stData.getOrElse(Seq())).reduce(_ ++ _) 332 memBlock.io.stData := stData 333 memBlock.io.csrCtrl <> csrioIn.customCtrl 334 memBlock.io.tlbCsr <> csrioIn.tlb 335 memBlock.io.lsqio.roq <> ctrlBlock.io.roqio.lsq 336 memBlock.io.lsqio.exceptionAddr.lsIdx.lqIdx := ctrlBlock.io.roqio.exception.bits.uop.lqIdx 337 memBlock.io.lsqio.exceptionAddr.lsIdx.sqIdx := ctrlBlock.io.roqio.exception.bits.uop.sqIdx 338 memBlock.io.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.roqio.exception.bits.uop.ctrl.commitType) 339 340 val itlbRepeater = Module(new PTWRepeater(2)) 341 val dtlbRepeater = Module(new PTWFilter(LoadPipelineWidth + StorePipelineWidth, l2tlbParams.missQueueSize-1)) 342 itlbRepeater.io.tlb <> frontend.io.ptw 343 dtlbRepeater.io.tlb <> memBlock.io.ptw 344 itlbRepeater.io.sfence <> fenceio.sfence 345 dtlbRepeater.io.sfence <> fenceio.sfence 346 ptw.io.tlb(0) <> itlbRepeater.io.ptw 347 ptw.io.tlb(1) <> dtlbRepeater.io.ptw 348 ptw.io.sfence <> fenceio.sfence 349 ptw.io.csr <> csrioIn.tlb 350 351 // if l2 prefetcher use stream prefetch, it should be placed in XSCore 352 io.l2_pf_enable := csrioIn.customCtrl.l2_pf_enable 353 354 val ptw_reset_gen = Module(new ResetGen(2, !debugOpts.FPGAPlatform)) 355 ptw.reset := ptw_reset_gen.io.out 356 itlbRepeater.reset := ptw_reset_gen.io.out 357 dtlbRepeater.reset := ptw_reset_gen.io.out 358 359 val memBlock_reset_gen = Module(new ResetGen(3, !debugOpts.FPGAPlatform)) 360 memBlock.reset := memBlock_reset_gen.io.out 361 362 val exuBlock_reset_gen = Module(new ResetGen(4, !debugOpts.FPGAPlatform)) 363 exuBlocks.foreach(_.reset := exuBlock_reset_gen.io.out) 364 365 val ctrlBlock_reset_gen = Module(new ResetGen(6, !debugOpts.FPGAPlatform)) 366 ctrlBlock.reset := ctrlBlock_reset_gen.io.out 367 368 val frontend_reset_gen = Module(new ResetGen(7, !debugOpts.FPGAPlatform)) 369 frontend.reset := frontend_reset_gen.io.out 370} 371