1// Note: Floating point operations must follow IEEE 754 rules, using round-to-nearest and gradual 2// underflow, except where stated otherwise. 3 4// 5// floating-point comparators vAA, vBB, vCC 6// Note: Perform the indicated floating point comparison, setting a to 0 if b == c, 1 if b > c, or 7// -1 if b < c. The "bias" listed indicates how NaN comparisons are treated: "gt bias" instructions 8// return 1 for NaN comparisons, and "lt bias" instructions return -1. 9// 10 11// cmpl-float vAA, vBB, vCC 12// Format 23x: AA|2d CC|BB 13// LT bias, if NaN then vAA := -1 14%def op_cmpl_float(is_double=False): 15 FETCH t1, count=1 // t1 := CC|BB 16 srliw t0, xINST, 8 // t0 := AA 17 srliw t2, t1, 8 // t2 := CC 18 andi t1, t1, 0xFF // t1 := BB 19% get_vreg_float("ft1", "t1", is_double=is_double) # ft1 := fp[BB] 20% get_vreg_float("ft2", "t2", is_double=is_double) # ft2 := fp[CC] 21 // Note: Formula "((FLE r,l) - 1) + (FLT r,l)" lifted from compiler. 22% precision = "d" if is_double else "s" 23 fle.${precision} t1, ft2, ft1 24 flt.${precision} t2, ft2, ft1 25 addi t1, t1, -1 26 add t2, t2, t1 27 FETCH_ADVANCE_INST 2 28% set_vreg("t2", "t0", z0="t1") # fp[AA] := result 29 GET_INST_OPCODE t0 30 GOTO_OPCODE t0 31 32// cmpg-float vvAA, vBB, vCC 33// Format 23x: AA|2e CC|BB 34// GT bias, if NaN then vAA := 1 35%def op_cmpg_float(is_double=False): 36 FETCH t1, count=1 // t1 := CC|BB 37 srliw t0, xINST, 8 // t0 := AA 38 srliw t2, t1, 8 // t2 := CC 39 andi t1, t1, 0xFF // t1 := BB 40% get_vreg_float("ft1", "t1", is_double=is_double) # ft1 := fp[BB] 41% get_vreg_float("ft2", "t2", is_double=is_double) # ft2 := fp[CC] 42 // Note: Formula "((FLE l,r) ^ 1) - (FLT l,r)" lifted from compiler. 43% precision = "d" if is_double else "s" 44 fle.${precision} t1, ft1, ft2 45 flt.${precision} t2, ft1, ft2 46 xori t1, t1, 1 47 sub t2, t1, t2 48 FETCH_ADVANCE_INST 2 49% set_vreg("t2", "t0", z0="t1") # fp[AA] := result 50 GET_INST_OPCODE t0 51 GOTO_OPCODE t0 52 53// cmpl-double vAA, vBB, vCC 54// Format 23x: AA|2f CC|BB 55// LT bias, if NaN then vAA := -1 56%def op_cmpl_double(): 57% op_cmpl_float(is_double=True) 58 59// cmpg-double vAA, vBB, vCC 60// Format 23x: AA|30 CC|BB 61// Note: Formula "((FLE l,r) ^ 1) - (FLT l,r)" lifted from compiler. 62// GT bias, if NaN then vAA := 1 63%def op_cmpg_double(): 64% op_cmpg_float(is_double=True) 65 66// 67// funop vA, vB 68// Format 12x: B|A|op 69// 70 71// neg-float vA, vB 72// Format 12x: B|A|7f 73%def op_neg_float(): 74% generic_funop(instr="fneg.s ft0, ft0", dst="s", src="s") 75 76// neg-double vA, vB 77// Format 12x: B|A|80 78%def op_neg_double(): 79% generic_funop(instr="fneg.d ft0, ft0", dst="d", src="d") 80 81// int-to-float vA, vB 82// Format 12x: B|A|82 83// Note: Conversion of int32 to float, using round-to-nearest. This loses precision for some values. 84// Note: For ties, the IEEE 754-2008 standard defaults to "roundTiesToEven" for binary floats. 85%def op_int_to_float(): 86% generic_funop(instr="fcvt.s.w ft0, t1, rne", dst="s", src="w") 87 88// int-to-double vA, vB 89// Format 12x: B|A|83 90// Note: Conversion of int32 to double. 91%def op_int_to_double(): 92% generic_funop(instr="fcvt.d.w ft0, t1", dst="d", src="w") 93 94// long-to-float vA, vB 95// Format 12x: B|A|85 96// Note: Conversion of int64 to float, using round-to-nearest. This loses precision for some values. 97// Note: For ties, the IEEE 754-2008 standard defaults to "roundTiesToEven" for binary floats. 98%def op_long_to_float(): 99% generic_funop(instr="fcvt.s.l ft0, t1, rne", dst="s", src="l") 100 101// long-to-double vA, vB 102// Format 12x: B|A|86 103// Note: Conversion of int64 to double, using round-to-nearest. This loses precision for some values. 104// Note: For ties, the IEEE 754-2008 standard defaults to "roundTiesToEven" for binary floats. 105%def op_long_to_double(): 106% generic_funop(instr="fcvt.d.l ft0, t1, rne", dst="d", src="l") 107 108// float-to-int vA, vB 109// Format 12x: B|A|87 110// Note: Conversion of float to int32, using round-toward-zero. NaN and -0.0 (negative zero) 111// convert to the integer 0. Infinities and values with too large a magnitude to be represented 112// get converted to either 0x7fffffff or -0x80000000 depending on sign. 113// 114// FCVT.W.S RTZ has the following behavior: 115// - NaN rounds to 0x7ffffff - requires check and set to zero. 116// - negative zero rounds to zero - matches dex spec. 117// - pos inf rounds to 0x7fffffff - matches dex spec. 118// - neg inf rounds to 0x80000000 - matches dex spec. 119%def op_float_to_int(): 120% generic_funop(instr="fcvt.w.s t1, ft0, rtz", dst="w", src="s", nan_zeroed=True) 121 122// float-to-long vA, vB 123// Format 12x: B|A|88 124// Note: Conversion of float to int64, using round-toward-zero. The same special case rules as for 125// float-to-int apply here, except that out-of-range values get converted to either 126// 0x7fffffffffffffff or -0x8000000000000000 depending on sign. 127// 128// FCVT.L.S RTZ has the following behavior: 129// - NaN rounds to 0x7fffffffffffffff - requires check and set to zero. 130// - negative zero rounds to zero - matches dex spec. 131// - pos inf rounds to 0x7fffffffffffffff - matches dex spec. 132// - neg inf rounds to 0x8000000000000000 - matches dex spec. 133%def op_float_to_long(): 134% generic_funop(instr="fcvt.l.s t1, ft0, rtz", dst="l", src="s", nan_zeroed=True) 135 136// float-to-double vA, vB 137// Format 12x: B|A|89 138// Note: Conversion of float to double, preserving the value exactly. 139%def op_float_to_double(): 140% generic_funop(instr="fcvt.d.s ft0, ft0", dst="d", src="s") 141 142// double-to-int vA, vB 143// Format 12x: B|A|8a 144// Note: Conversion of double to int32, using round-toward-zero. The same special case rules as for 145// float-to-int apply here. 146%def op_double_to_int(): 147% generic_funop(instr="fcvt.w.d t1, ft0, rtz", dst="w", src="d", nan_zeroed=True) 148 149// double-to-long vA, vB 150// Format 12x: B|A|8b 151// Note: Conversion of double to int64, using round-toward-zero. The same special case rules as for 152// float-to-long apply here. 153%def op_double_to_long(): 154% generic_funop(instr="fcvt.l.d t1, ft0, rtz", dst="l", src="d", nan_zeroed=True) 155 156// double-to-float vA, vB 157// Format 12x: B|A|8c 158// Note: Conversion of double to float, using round-to-nearest. This loses precision for some values. 159// Note: For ties, the IEEE 754-2008 standard defaults to "roundTiesToEven" for binary floats. 160%def op_double_to_float(): 161% generic_funop(instr="fcvt.s.d ft0, ft0, rne", dst="s", src="d") 162 163// unop boilerplate 164// instr: operand held in t1 or ft0, result written to t1 or ft0. 165// instr must not clobber t2. 166// dst: one of w (int32), l (int64), s (float), d (double) 167// src: one of w (int32), l (int64), s (float), d (double) 168// Clobbers: ft0, t0, t1, t2 169%def generic_funop(instr, dst, src, nan_zeroed=False): 170 srliw t0, xINST, 12 // t0 := B 171 srliw t2, xINST, 8 // t2 := B|A 172 173% if src == "w": 174% get_vreg("t1", "t0") # t1 := fp[B] 175% elif src == "l": 176 GET_VREG_WIDE t1, t0 // t1 := fp[B] 177% elif src == "s": 178% get_vreg_float("ft0", "t0") # ft0 := fp[B] 179% elif src == "d": 180 GET_VREG_DOUBLE ft0, t0 // ft0 := fp[B] 181% else: 182% assert false, src 183%#: 184 and t2, t2, 0xF // t2 := A 185 FETCH_ADVANCE_INST 1 // advance xPC, load xINST 186% if nan_zeroed: 187 // Okay to clobber T1. It is not read if nan_zeroed=True. 188 fclass.${src} t1, ft0 // fclass.s or fclass.d on the source register ft0 189 sltiu t1, t1, 0x100 // t1 := 0 if NaN, per dex spec. Skip the conversion. 190 beqz t1, 1f 191%#: 192 $instr // read operand (from t1|ft0), write result (to t1|ft0) 193 // do not clobber t2! 1941: 195 196% if dst == "w": 197% set_vreg("t1", "t2", z0="t0") # fp[A] := t1 198% elif dst == "l": 199 SET_VREG_WIDE t1, t2, z0=t0 // fp[A] := t1 200% elif dst == "s": 201% set_vreg_float("ft0", "t2", z0="t0") # fp[A] := ft0 202% elif dst == "d": 203 SET_VREG_DOUBLE ft0, t2, z0=t0 // fp[B] := ft0 204% else: 205% assert false, dst 206%#: 207 208 GET_INST_OPCODE t0 // t0 holds next opcode 209 GOTO_OPCODE t0 // continue to next 210 211// 212// fbinop vAA, vBB, vCC 213// Format 23x: AA|op CC|BB 214// 215 216// add-float vAA, vBB, vCC 217// Format 23x: AA|a6 CC|BB 218%def op_add_float(): 219% generic_fbinop(instr="fadd.s fa0, fa0, fa1, rne") 220 221// sub-float vAA, vBB, vCC 222// Format 23x: AA|a7 CC|BB 223%def op_sub_float(): 224% generic_fbinop(instr="fsub.s fa0, fa0, fa1, rne") 225 226// mul-float vAA, vBB, vCC 227// Format 23x: AA|a8 CC|BB 228%def op_mul_float(): 229% generic_fbinop(instr="fmul.s fa0, fa0, fa1, rne") 230 231// div-float vAA, vBB, vCC 232// Format 23x: AA|a9 CC|BB 233%def op_div_float(): 234% generic_fbinop(instr="fdiv.s fa0, fa0, fa1, rne") 235 236// rem-float vAA, vBB, vCC 237// Format 23x: AA|aa CC|BB 238// Note: Floating point remainder after division. This function is different than IEEE 754 remainder 239// and is defined as result == a - roundTowardZero(a / b) * b. 240// Note: RISC-V does not offer floating point remainder; use fmodf in libm. 241%def op_rem_float(): 242% generic_fbinop(instr="call fmodf") 243 244// add-double vAA, vBB, vCC 245// Format 23x: AA|ab CC|BB 246%def op_add_double(): 247% generic_fbinop(instr="fadd.d fa0, fa0, fa1, rne", is_double=True) 248 249// sub-double vAA, vBB, vCC 250// Format 23x: AA|ac CC|BB 251%def op_sub_double(): 252% generic_fbinop(instr="fsub.d fa0, fa0, fa1, rne", is_double=True) 253 254// mul-double vAA, vBB, vCC 255// Format 23x: AA|ad CC|BB 256%def op_mul_double(): 257% generic_fbinop(instr="fmul.d fa0, fa0, fa1, rne", is_double=True) 258 259// div-double vAA, vBB, vCC 260// Format 23x: AA|ae CC|BB 261%def op_div_double(): 262% generic_fbinop(instr="fdiv.d fa0, fa0, fa1, rne", is_double=True) 263 264// rem-double vAA, vBB, vCC 265// Format 23x: AA|af CC|BB 266// Note: Floating point remainder after division. This function is different than IEEE 754 remainder 267// and is defined as result == a - roundTowardZero(a / b) * b. 268// Note: RISC-V does not offer floating point remainder; use fmod in libm. 269%def op_rem_double(): 270% generic_fbinop(instr="call fmod", is_double=True) 271 272// fbinop boilerplate 273// instr: operands held in fa0 and fa1, result written to fa0 274// instr may be a libm call, so: 275// - avoid caller-save state across instr; s11 is used instead. 276// - fa0 and fa1 are used instead of ft0 and ft1. 277// 278// The is_double flag ensures vregs are read and written in 64-bit widths. 279// Clobbers: t0, t1, fa0, fa1, s11 280%def generic_fbinop(instr, is_double=False): 281 FETCH t0, count=1 // t0 := CC|BB 282 srliw s11, xINST, 8 // s11 := AA 283 srliw t1, t0, 8 // t1 := CC 284 and t0, t0, 0xFF // t0 := BB 285% get_vreg_float("fa1", "t1", is_double=is_double) 286 // fa1 := fp[CC] 287% get_vreg_float("fa0", "t0", is_double=is_double) 288 // fa0 := fp[BB] 289 FETCH_ADVANCE_INST 2 // advance xPC, load xINST 290 $instr // read fa0 and fa1, write result to fa0. 291 // instr may be a function call. 292% set_vreg_float("fa0", "s11", z0="t0", is_double=is_double) 293 // fp[AA] := fa0 294 GET_INST_OPCODE t0 // t0 holds next opcode 295 GOTO_OPCODE t0 // continue to next 296 297// 298// fbinop/2addr vA, vB 299// Format 12x: B|A|op 300// 301 302// add-float/2addr vA, vB 303// Format 12x: B|A|c6 304%def op_add_float_2addr(): 305% generic_fbinop_2addr(instr="fadd.s fa0, fa0, fa1") 306 307// sub-float/2addr vA, vB 308// Format 12x: B|A|c7 309%def op_sub_float_2addr(): 310% generic_fbinop_2addr(instr="fsub.s fa0, fa0, fa1") 311 312// mul-float/2addr vA, vB 313// Format 12x: B|A|c8 314%def op_mul_float_2addr(): 315% generic_fbinop_2addr(instr="fmul.s fa0, fa0, fa1") 316 317// div-float/2addr vA, vB 318// Format 12x: B|A|c9 319%def op_div_float_2addr(): 320% generic_fbinop_2addr(instr="fdiv.s fa0, fa0, fa1") 321 322// rem-float/2addr vA, vB 323// Format 12x: B|A|ca 324// Note: Floating point remainder after division. This function is different than IEEE 754 remainder 325// and is defined as result == a - roundTowardZero(a / b) * b. 326// Note: RISC-V does not offer floating point remainder; use fmodf in libm. 327%def op_rem_float_2addr(): 328% generic_fbinop_2addr(instr="call fmodf") 329 330// add-double/2addr vA, vB 331// Format 12x: B|A|cb 332%def op_add_double_2addr(): 333% generic_fbinop_2addr(instr="fadd.d fa0, fa0, fa1", is_double=True) 334 335// sub-double/2addr vA, vB 336// Format 12x: B|A|cc 337%def op_sub_double_2addr(): 338% generic_fbinop_2addr(instr="fsub.d fa0, fa0, fa1", is_double=True) 339 340// mul-double/2addr vA, vB 341// Format 12x: B|A|cd 342%def op_mul_double_2addr(): 343% generic_fbinop_2addr(instr="fmul.d fa0, fa0, fa1", is_double=True) 344 345// div-double/2addr vA, vB 346// Format 12x: B|A|ce 347%def op_div_double_2addr(): 348% generic_fbinop_2addr(instr="fdiv.d fa0, fa0, fa1", is_double=True) 349 350// rem-double/2addr vA, vB 351// Format 12x: B|A|cf 352// Note: Floating point remainder after division. This function is different than IEEE 754 remainder 353// and is defined as result == a - roundTowardZero(a / b) * b. 354// Note: RISC-V does not offer floating point remainder; use fmod in libm. 355%def op_rem_double_2addr(): 356% generic_fbinop_2addr(instr="call fmod", is_double=True) 357 358// fbinop/2addr boilerplate 359// instr: operands held in fa0 and fa1, result written to fa0 360// instr may be a libm call, so: 361// - avoid caller-save state across instr; s11 is used instead. 362// - use fa0 and fa1 instead of ft0 and ft1. 363// 364// The is_double flag ensures vregs are read and written in 64-bit widths. 365// Clobbers: t0, t1, fa0, fa1, s11 366%def generic_fbinop_2addr(instr, is_double=False): 367 srliw t0, xINST, 8 // t0 := B|A 368 srliw t1, xINST, 12 // t1 := B 369 and t0, t0, 0xF // t0 := A 370% get_vreg_float("fa1", "t1", is_double=is_double) 371 // fa1 := fp[B] 372 mv s11, t0 // s11 := A 373% get_vreg_float("fa0", "t0", is_double=is_double) 374 // fa0 := fp[A] 375 FETCH_ADVANCE_INST 1 // advance xPC, load xINST 376 $instr // read fa0 and f1, write result to fa0. 377 // instr may be a function call. 378 GET_INST_OPCODE t1 // t1 holds next opcode 379% set_vreg_float("fa0", "s11", z0="t0", is_double=is_double) 380 // fp[A] := fa0 381 GOTO_OPCODE t1 // continue to next 382