1*7c3d14c8STreehugger Robot//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===// 2*7c3d14c8STreehugger Robot// 3*7c3d14c8STreehugger Robot// The LLVM Compiler Infrastructure 4*7c3d14c8STreehugger Robot// 5*7c3d14c8STreehugger Robot// This file is dual licensed under the MIT and the University of Illinois Open 6*7c3d14c8STreehugger Robot// Source Licenses. See LICENSE.TXT for details. 7*7c3d14c8STreehugger Robot// 8*7c3d14c8STreehugger Robot//===----------------------------------------------------------------------===// 9*7c3d14c8STreehugger Robot// 10*7c3d14c8STreehugger Robot// This file implements a fairly generic conversion from a wider to a narrower 11*7c3d14c8STreehugger Robot// IEEE-754 floating-point type in the default (round to nearest, ties to even) 12*7c3d14c8STreehugger Robot// rounding mode. The constants and types defined following the includes below 13*7c3d14c8STreehugger Robot// parameterize the conversion. 14*7c3d14c8STreehugger Robot// 15*7c3d14c8STreehugger Robot// This routine can be trivially adapted to support conversions to 16*7c3d14c8STreehugger Robot// half-precision or from quad-precision. It does not support types that don't 17*7c3d14c8STreehugger Robot// use the usual IEEE-754 interchange formats; specifically, some work would be 18*7c3d14c8STreehugger Robot// needed to adapt it to (for example) the Intel 80-bit format or PowerPC 19*7c3d14c8STreehugger Robot// double-double format. 20*7c3d14c8STreehugger Robot// 21*7c3d14c8STreehugger Robot// Note please, however, that this implementation is only intended to support 22*7c3d14c8STreehugger Robot// *narrowing* operations; if you need to convert to a *wider* floating-point 23*7c3d14c8STreehugger Robot// type (e.g. float -> double), then this routine will not do what you want it 24*7c3d14c8STreehugger Robot// to. 25*7c3d14c8STreehugger Robot// 26*7c3d14c8STreehugger Robot// It also requires that integer types at least as large as both formats 27*7c3d14c8STreehugger Robot// are available on the target platform; this may pose a problem when trying 28*7c3d14c8STreehugger Robot// to add support for quad on some 32-bit systems, for example. 29*7c3d14c8STreehugger Robot// 30*7c3d14c8STreehugger Robot// Finally, the following assumptions are made: 31*7c3d14c8STreehugger Robot// 32*7c3d14c8STreehugger Robot// 1. floating-point types and integer types have the same endianness on the 33*7c3d14c8STreehugger Robot// target platform 34*7c3d14c8STreehugger Robot// 35*7c3d14c8STreehugger Robot// 2. quiet NaNs, if supported, are indicated by the leading bit of the 36*7c3d14c8STreehugger Robot// significand field being set 37*7c3d14c8STreehugger Robot// 38*7c3d14c8STreehugger Robot//===----------------------------------------------------------------------===// 39*7c3d14c8STreehugger Robot 40*7c3d14c8STreehugger Robot#include "fp_trunc.h" 41*7c3d14c8STreehugger Robot 42*7c3d14c8STreehugger Robotstatic __inline dst_t __truncXfYf2__(src_t a) { 43*7c3d14c8STreehugger Robot // Various constants whose values follow from the type parameters. 44*7c3d14c8STreehugger Robot // Any reasonable optimizer will fold and propagate all of these. 45*7c3d14c8STreehugger Robot const int srcBits = sizeof(src_t)*CHAR_BIT; 46*7c3d14c8STreehugger Robot const int srcExpBits = srcBits - srcSigBits - 1; 47*7c3d14c8STreehugger Robot const int srcInfExp = (1 << srcExpBits) - 1; 48*7c3d14c8STreehugger Robot const int srcExpBias = srcInfExp >> 1; 49*7c3d14c8STreehugger Robot 50*7c3d14c8STreehugger Robot const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; 51*7c3d14c8STreehugger Robot const src_rep_t srcSignificandMask = srcMinNormal - 1; 52*7c3d14c8STreehugger Robot const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; 53*7c3d14c8STreehugger Robot const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); 54*7c3d14c8STreehugger Robot const src_rep_t srcAbsMask = srcSignMask - 1; 55*7c3d14c8STreehugger Robot const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; 56*7c3d14c8STreehugger Robot const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); 57*7c3d14c8STreehugger Robot const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); 58*7c3d14c8STreehugger Robot const src_rep_t srcNaNCode = srcQNaN - 1; 59*7c3d14c8STreehugger Robot 60*7c3d14c8STreehugger Robot const int dstBits = sizeof(dst_t)*CHAR_BIT; 61*7c3d14c8STreehugger Robot const int dstExpBits = dstBits - dstSigBits - 1; 62*7c3d14c8STreehugger Robot const int dstInfExp = (1 << dstExpBits) - 1; 63*7c3d14c8STreehugger Robot const int dstExpBias = dstInfExp >> 1; 64*7c3d14c8STreehugger Robot 65*7c3d14c8STreehugger Robot const int underflowExponent = srcExpBias + 1 - dstExpBias; 66*7c3d14c8STreehugger Robot const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; 67*7c3d14c8STreehugger Robot const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; 68*7c3d14c8STreehugger Robot const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; 69*7c3d14c8STreehugger Robot 70*7c3d14c8STreehugger Robot const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); 71*7c3d14c8STreehugger Robot const dst_rep_t dstNaNCode = dstQNaN - 1; 72*7c3d14c8STreehugger Robot 73*7c3d14c8STreehugger Robot // Break a into a sign and representation of the absolute value 74*7c3d14c8STreehugger Robot const src_rep_t aRep = srcToRep(a); 75*7c3d14c8STreehugger Robot const src_rep_t aAbs = aRep & srcAbsMask; 76*7c3d14c8STreehugger Robot const src_rep_t sign = aRep & srcSignMask; 77*7c3d14c8STreehugger Robot dst_rep_t absResult; 78*7c3d14c8STreehugger Robot 79*7c3d14c8STreehugger Robot if (aAbs - underflow < aAbs - overflow) { 80*7c3d14c8STreehugger Robot // The exponent of a is within the range of normal numbers in the 81*7c3d14c8STreehugger Robot // destination format. We can convert by simply right-shifting with 82*7c3d14c8STreehugger Robot // rounding and adjusting the exponent. 83*7c3d14c8STreehugger Robot absResult = aAbs >> (srcSigBits - dstSigBits); 84*7c3d14c8STreehugger Robot absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; 85*7c3d14c8STreehugger Robot 86*7c3d14c8STreehugger Robot const src_rep_t roundBits = aAbs & roundMask; 87*7c3d14c8STreehugger Robot // Round to nearest 88*7c3d14c8STreehugger Robot if (roundBits > halfway) 89*7c3d14c8STreehugger Robot absResult++; 90*7c3d14c8STreehugger Robot // Ties to even 91*7c3d14c8STreehugger Robot else if (roundBits == halfway) 92*7c3d14c8STreehugger Robot absResult += absResult & 1; 93*7c3d14c8STreehugger Robot } 94*7c3d14c8STreehugger Robot else if (aAbs > srcInfinity) { 95*7c3d14c8STreehugger Robot // a is NaN. 96*7c3d14c8STreehugger Robot // Conjure the result by beginning with infinity, setting the qNaN 97*7c3d14c8STreehugger Robot // bit and inserting the (truncated) trailing NaN field. 98*7c3d14c8STreehugger Robot absResult = (dst_rep_t)dstInfExp << dstSigBits; 99*7c3d14c8STreehugger Robot absResult |= dstQNaN; 100*7c3d14c8STreehugger Robot absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode; 101*7c3d14c8STreehugger Robot } 102*7c3d14c8STreehugger Robot else if (aAbs >= overflow) { 103*7c3d14c8STreehugger Robot // a overflows to infinity. 104*7c3d14c8STreehugger Robot absResult = (dst_rep_t)dstInfExp << dstSigBits; 105*7c3d14c8STreehugger Robot } 106*7c3d14c8STreehugger Robot else { 107*7c3d14c8STreehugger Robot // a underflows on conversion to the destination type or is an exact 108*7c3d14c8STreehugger Robot // zero. The result may be a denormal or zero. Extract the exponent 109*7c3d14c8STreehugger Robot // to get the shift amount for the denormalization. 110*7c3d14c8STreehugger Robot const int aExp = aAbs >> srcSigBits; 111*7c3d14c8STreehugger Robot const int shift = srcExpBias - dstExpBias - aExp + 1; 112*7c3d14c8STreehugger Robot 113*7c3d14c8STreehugger Robot const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal; 114*7c3d14c8STreehugger Robot 115*7c3d14c8STreehugger Robot // Right shift by the denormalization amount with sticky. 116*7c3d14c8STreehugger Robot if (shift > srcSigBits) { 117*7c3d14c8STreehugger Robot absResult = 0; 118*7c3d14c8STreehugger Robot } else { 119*7c3d14c8STreehugger Robot const bool sticky = significand << (srcBits - shift); 120*7c3d14c8STreehugger Robot src_rep_t denormalizedSignificand = significand >> shift | sticky; 121*7c3d14c8STreehugger Robot absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); 122*7c3d14c8STreehugger Robot const src_rep_t roundBits = denormalizedSignificand & roundMask; 123*7c3d14c8STreehugger Robot // Round to nearest 124*7c3d14c8STreehugger Robot if (roundBits > halfway) 125*7c3d14c8STreehugger Robot absResult++; 126*7c3d14c8STreehugger Robot // Ties to even 127*7c3d14c8STreehugger Robot else if (roundBits == halfway) 128*7c3d14c8STreehugger Robot absResult += absResult & 1; 129*7c3d14c8STreehugger Robot } 130*7c3d14c8STreehugger Robot } 131*7c3d14c8STreehugger Robot 132*7c3d14c8STreehugger Robot // Apply the signbit to (dst_t)abs(a). 133*7c3d14c8STreehugger Robot const dst_rep_t result = absResult | sign >> (srcBits - dstBits); 134*7c3d14c8STreehugger Robot return dstFromRep(result); 135*7c3d14c8STreehugger Robot} 136