1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.backend.fu 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils.SignExt 23import xiangshan.backend.fu.util.CSA3_2 24 25/** A Radix-4 SRT Integer Divider 26 * 27 * 2 ~ (5 + (len+3)/2) cycles are needed for each division. 28 */ 29class SRT4DividerDataModule(len: Int) extends Module { 30 val io = IO(new Bundle() { 31 val src = Vec(2, Input(UInt(len.W))) 32 val valid, sign, kill_w, kill_r, isHi, isW = Input(Bool()) 33 val in_ready = Output(Bool()) 34 val out_valid = Output(Bool()) 35 val out_data = Output(UInt(len.W)) 36 val out_ready = Input(Bool()) 37 }) 38 39 val (a, b, sign, valid, kill_w, kill_r, isHi, isW) = 40 (io.src(0), io.src(1), io.sign, io.valid, io.kill_w, io.kill_r, io.isHi, io.isW) 41 val in_fire = valid && io.in_ready 42 val out_fire = io.out_ready && io.out_valid 43 44 // s_pad_* is not used 45 val s_idle :: s_lzd :: s_normlize :: s_recurrence :: s_recovery_1 :: s_recovery_2 :: s_pad_1 :: s_pad_2 :: s_finish :: Nil = Enum(9) 46 require(s_finish.litValue() == 8) 47 48 val state = RegInit(s_idle) 49 val finished = state(3).asBool // state === s_finish 50 51 val cnt_next = Wire(UInt(log2Up((len + 3) / 2).W)) 52 val cnt = RegEnable(cnt_next, state === s_normlize || state === s_recurrence) 53 val rec_enough = cnt_next === 0.U 54 val newReq = in_fire 55 56 def abs(a: UInt, sign: Bool): (Bool, UInt) = { 57 val s = a(len - 1) && sign 58 (s, Mux(s, -a, a)) 59 } 60 61 val (aSign, aVal) = abs(a, sign) 62 val (bSign, bVal) = abs(b, sign) 63 val aSignReg = RegEnable(aSign, newReq) 64 val qSignReg = RegEnable(aSign ^ bSign, newReq) 65 val divZero = b === 0.U 66 val divZeroReg = RegEnable(divZero, newReq) 67 68 switch(state) { 69 is(s_idle) { 70 when(in_fire && !kill_w) { 71 state := Mux(divZero, s_finish, s_lzd) 72 } 73 } 74 is(s_lzd) { // leading zero detection 75 state := s_normlize 76 } 77 is(s_normlize) { // shift a/b 78 state := s_recurrence 79 } 80 is(s_recurrence) { // (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d 81 when(rec_enough) { 82 state := s_recovery_1 83 } 84 } 85 is(s_recovery_1) { // if rem < 0, rem = rem + d 86 state := s_recovery_2 87 } 88 is(s_recovery_2) { // recovery shift 89 state := s_finish 90 } 91 is(s_finish) { 92 when(out_fire) { 93 state := s_idle 94 } 95 } 96 } 97 when(kill_r) { 98 state := s_idle 99 } 100 101 /** Calculate abs(a)/abs(b) by recurrence 102 * 103 * ws, wc: partial remainder in carry-save form, 104 * in recurrence steps, ws/wc = 4ws[j]/4wc[j]; 105 * in recovery step, ws/wc = ws[j]/wc[j]; 106 * in final step, ws = abs(a)/abs(b). 107 * 108 * d: normlized divisor(1/2<=d<1) 109 * 110 * wLen = 3 integer bits + (len+1) frac bits 111 */ 112 def wLen = 3 + len + 1 113 114 val ws, wc = Reg(UInt(wLen.W)) 115 val ws_next, wc_next = Wire(UInt(wLen.W)) 116 val d = Reg(UInt(wLen.W)) 117 118 val aLeadingZeros = RegEnable( 119 next = PriorityEncoder(ws(len - 1, 0).asBools().reverse), 120 enable = state === s_lzd 121 ) 122 val bLeadingZeros = RegEnable( 123 next = PriorityEncoder(d(len - 1, 0).asBools().reverse), 124 enable = state === s_lzd 125 ) 126 val diff = Cat(0.U(1.W), bLeadingZeros).asSInt() - Cat(0.U(1.W), aLeadingZeros).asSInt() 127 val isNegDiff = diff(diff.getWidth - 1) 128 val quotientBits = Mux(isNegDiff, 0.U, diff.asUInt()) 129 val qBitsIsOdd = quotientBits(0) 130 val recoveryShift = RegEnable(len.U - bLeadingZeros, state === s_normlize) 131 val a_shifted, b_shifted = Wire(UInt(len.W)) 132 a_shifted := Mux(isNegDiff, 133 ws(len - 1, 0) << bLeadingZeros, 134 ws(len - 1, 0) << aLeadingZeros 135 ) 136 b_shifted := d(len - 1, 0) << bLeadingZeros 137 138 val rem_temp = ws + wc 139 val rem_fixed = RegEnable(Mux(rem_temp(wLen - 1), rem_temp + d, rem_temp), state === s_recovery_1) 140 val rem_abs = RegEnable((rem_fixed << recoveryShift) (2 * len, len + 1), state === s_recovery_2) 141 142 when(newReq) { 143 ws := Cat(0.U(4.W), Mux(divZero, a, aVal)) 144 wc := 0.U 145 d := Cat(0.U(4.W), bVal) 146 }.elsewhen(state === s_normlize) { 147 d := Cat(0.U(3.W), b_shifted, 0.U(1.W)) 148 ws := Mux(qBitsIsOdd, a_shifted, a_shifted << 1) 149 }.elsewhen(state === s_recurrence) { 150 ws := Mux(rec_enough, ws_next, ws_next << 2) 151 wc := Mux(rec_enough, wc_next, wc_next << 2) 152 } 153 154 cnt_next := Mux(state === s_normlize, (quotientBits + 3.U) >> 1, cnt - 1.U) 155 156 /** Quotient selection 157 * 158 * the quotient selection table use truncated 7-bit remainder 159 * and 3-bit divisor 160 */ 161 val sel_0 :: sel_d :: sel_dx2 :: sel_neg_d :: sel_neg_dx2 :: Nil = Enum(5) 162 val dx2, neg_d, neg_dx2 = Wire(UInt(wLen.W)) 163 dx2 := d << 1 164 neg_d := (~d).asUInt() // add '1' in carry-save adder later 165 neg_dx2 := neg_d << 1 166 167 val q_sel = Wire(UInt(3.W)) 168 val wc_adj = MuxLookup(q_sel, 0.U(2.W), Seq( 169 sel_d -> 1.U(2.W), 170 sel_dx2 -> 2.U(2.W) 171 )) 172 173 val w_truncated = (ws(wLen - 1, wLen - 1 - 6) + wc(wLen - 1, wLen - 1 - 6)).asSInt() 174 val d_truncated = b_shifted.tail(1).head(3) 175 176 val qSelTable = Array( 177 Array(12, 4, -4, -13), 178 Array(14, 4, -6, -15), 179 Array(15, 4, -6, -16), 180 Array(16, 4, -6, -18), 181 Array(18, 6, -8, -20), 182 Array(20, 6, -8, -20), 183 Array(20, 8, -8, -22), 184 Array(24, 8, -8, -24) 185 ) 186 187 val table = RegEnable( 188 VecInit(qSelTable.map(row => 189 VecInit(row.map(k => k.S(7.W))) 190 ))(d_truncated), 191 state === s_normlize 192 ) 193 194 q_sel := MuxCase(sel_neg_dx2, 195 table.zip(Seq(sel_dx2, sel_d, sel_0, sel_neg_d)).map { 196 case (k, s) => (w_truncated >= k) -> s 197 } 198 ) 199 200 /** Calculate (ws[j+1],wc[j+1]) by a [3-2]carry-save adder 201 * 202 * (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d 203 */ 204 val csa = Module(new CSA3_2(wLen)) 205 csa.io.in(0) := ws 206 csa.io.in(1) := Cat(wc(wLen - 1, 2), wc_adj) 207 csa.io.in(2) := MuxLookup(q_sel, 0.U, Seq( 208 sel_d -> neg_d, 209 sel_dx2 -> neg_dx2, 210 sel_neg_d -> d, 211 sel_neg_dx2 -> dx2 212 )) 213 ws_next := csa.io.out(0) 214 wc_next := csa.io.out(1) << 1 215 216 // On the fly quotient conversion 217 val q, qm = Reg(UInt(len.W)) 218 when(newReq) { 219 q := 0.U 220 qm := 0.U 221 }.elsewhen(state === s_recurrence) { 222 val qMap = Seq( 223 sel_0 -> (q, 0), 224 sel_d -> (q, 1), 225 sel_dx2 -> (q, 2), 226 sel_neg_d -> (qm, 3), 227 sel_neg_dx2 -> (qm, 2) 228 ) 229 q := MuxLookup(q_sel, 0.U, 230 qMap.map(m => m._1 -> Cat(m._2._1(len - 3, 0), m._2._2.U(2.W))) 231 ) 232 val qmMap = Seq( 233 sel_0 -> (qm, 3), 234 sel_d -> (q, 0), 235 sel_dx2 -> (q, 1), 236 sel_neg_d -> (qm, 2), 237 sel_neg_dx2 -> (qm, 1) 238 ) 239 qm := MuxLookup(q_sel, 0.U, 240 qmMap.map(m => m._1 -> Cat(m._2._1(len - 3, 0), m._2._2.U(2.W))) 241 ) 242 }.elsewhen(state === s_recovery_1) { 243 q := Mux(rem_temp(wLen - 1), qm, q) 244 } 245 246 247 val remainder = Mux(aSignReg, -rem_abs(len - 1, 0), rem_abs(len - 1, 0)) 248 val quotient = Mux(qSignReg, -q, q) 249 250 val res = Mux(isHi, 251 Mux(divZeroReg, ws(len - 1, 0), remainder), 252 Mux(divZeroReg, Fill(len, 1.U(1.W)), quotient) 253 ) 254 io.out_data := Mux(isW, 255 SignExt(res(31, 0), len), 256 res 257 ) 258 io.in_ready := state === s_idle 259 io.out_valid := finished // state === s_finish 260} 261 262class SRT4Divider(len: Int)(implicit p: Parameters) extends AbstractDivider(len) { 263 264 val newReq = io.in.fire() 265 266 val uop = io.in.bits.uop 267 val uopReg = RegEnable(uop, newReq) 268 val ctrlReg = RegEnable(ctrl, newReq) 269 270 val divDataModule = Module(new SRT4DividerDataModule(len)) 271 272 val kill_w = uop.roqIdx.needFlush(io.redirectIn, io.flushIn) 273 val kill_r = !divDataModule.io.in_ready && uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn) 274 275 divDataModule.io.src(0) := io.in.bits.src(0) 276 divDataModule.io.src(1) := io.in.bits.src(1) 277 divDataModule.io.valid := io.in.valid 278 divDataModule.io.sign := sign 279 divDataModule.io.kill_w := kill_w 280 divDataModule.io.kill_r := kill_r 281 divDataModule.io.isHi := ctrlReg.isHi 282 divDataModule.io.isW := ctrlReg.isW 283 divDataModule.io.out_ready := io.out.ready 284 285 io.in.ready := divDataModule.io.in_ready 286 io.out.valid := divDataModule.io.out_valid 287 io.out.bits.data := divDataModule.io.out_data 288 io.out.bits.uop := uopReg 289} 290