1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX 4*9880d681SAndroid Build Coastguard Worker 5*9880d681SAndroid Build Coastguard Worker; Verify we fold loads into unary sse intrinsics only when optimizing for size 6*9880d681SAndroid Build Coastguard Worker 7*9880d681SAndroid Build Coastguard Workerdefine float @rcpss(float* %a) { 8*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: rcpss: 9*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 10*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movss (%rdi), %xmm0 11*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: rcpss %xmm0, %xmm0 12*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 13*9880d681SAndroid Build Coastguard Worker; 14*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: rcpss: 15*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 16*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovss (%rdi), %xmm0 17*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 18*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 19*9880d681SAndroid Build Coastguard Worker %ld = load float, float* %a 20*9880d681SAndroid Build Coastguard Worker %ins = insertelement <4 x float> undef, float %ld, i32 0 21*9880d681SAndroid Build Coastguard Worker %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins) 22*9880d681SAndroid Build Coastguard Worker %ext = extractelement <4 x float> %res, i32 0 23*9880d681SAndroid Build Coastguard Worker ret float %ext 24*9880d681SAndroid Build Coastguard Worker} 25*9880d681SAndroid Build Coastguard Worker 26*9880d681SAndroid Build Coastguard Workerdefine float @rsqrtss(float* %a) { 27*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: rsqrtss: 28*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 29*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movss (%rdi), %xmm0 30*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: rsqrtss %xmm0, %xmm0 31*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 32*9880d681SAndroid Build Coastguard Worker; 33*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: rsqrtss: 34*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 35*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovss (%rdi), %xmm0 36*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 37*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 38*9880d681SAndroid Build Coastguard Worker %ld = load float, float* %a 39*9880d681SAndroid Build Coastguard Worker %ins = insertelement <4 x float> undef, float %ld, i32 0 40*9880d681SAndroid Build Coastguard Worker %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins) 41*9880d681SAndroid Build Coastguard Worker %ext = extractelement <4 x float> %res, i32 0 42*9880d681SAndroid Build Coastguard Worker ret float %ext 43*9880d681SAndroid Build Coastguard Worker} 44*9880d681SAndroid Build Coastguard Worker 45*9880d681SAndroid Build Coastguard Workerdefine float @sqrtss(float* %a) { 46*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: sqrtss: 47*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 48*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movss (%rdi), %xmm0 49*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: sqrtss %xmm0, %xmm0 50*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 51*9880d681SAndroid Build Coastguard Worker; 52*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: sqrtss: 53*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 54*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovss (%rdi), %xmm0 55*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 56*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 57*9880d681SAndroid Build Coastguard Worker %ld = load float, float* %a 58*9880d681SAndroid Build Coastguard Worker %ins = insertelement <4 x float> undef, float %ld, i32 0 59*9880d681SAndroid Build Coastguard Worker %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins) 60*9880d681SAndroid Build Coastguard Worker %ext = extractelement <4 x float> %res, i32 0 61*9880d681SAndroid Build Coastguard Worker ret float %ext 62*9880d681SAndroid Build Coastguard Worker} 63*9880d681SAndroid Build Coastguard Worker 64*9880d681SAndroid Build Coastguard Workerdefine double @sqrtsd(double* %a) { 65*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: sqrtsd: 66*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 67*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movsd (%rdi), %xmm0 68*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: sqrtsd %xmm0, %xmm0 69*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 70*9880d681SAndroid Build Coastguard Worker; 71*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: sqrtsd: 72*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 73*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovsd (%rdi), %xmm0 74*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 75*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 76*9880d681SAndroid Build Coastguard Worker %ld = load double, double* %a 77*9880d681SAndroid Build Coastguard Worker %ins = insertelement <2 x double> undef, double %ld, i32 0 78*9880d681SAndroid Build Coastguard Worker %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins) 79*9880d681SAndroid Build Coastguard Worker %ext = extractelement <2 x double> %res, i32 0 80*9880d681SAndroid Build Coastguard Worker ret double %ext 81*9880d681SAndroid Build Coastguard Worker} 82*9880d681SAndroid Build Coastguard Worker 83*9880d681SAndroid Build Coastguard Workerdefine float @rcpss_size(float* %a) optsize { 84*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: rcpss_size: 85*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 86*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: rcpss (%rdi), %xmm0 87*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 88*9880d681SAndroid Build Coastguard Worker; 89*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: rcpss_size: 90*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 91*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0 92*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 93*9880d681SAndroid Build Coastguard Worker %ld = load float, float* %a 94*9880d681SAndroid Build Coastguard Worker %ins = insertelement <4 x float> undef, float %ld, i32 0 95*9880d681SAndroid Build Coastguard Worker %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins) 96*9880d681SAndroid Build Coastguard Worker %ext = extractelement <4 x float> %res, i32 0 97*9880d681SAndroid Build Coastguard Worker ret float %ext 98*9880d681SAndroid Build Coastguard Worker} 99*9880d681SAndroid Build Coastguard Worker 100*9880d681SAndroid Build Coastguard Workerdefine float @rsqrtss_size(float* %a) optsize { 101*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: rsqrtss_size: 102*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 103*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: rsqrtss (%rdi), %xmm0 104*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 105*9880d681SAndroid Build Coastguard Worker; 106*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: rsqrtss_size: 107*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 108*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0 109*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 110*9880d681SAndroid Build Coastguard Worker %ld = load float, float* %a 111*9880d681SAndroid Build Coastguard Worker %ins = insertelement <4 x float> undef, float %ld, i32 0 112*9880d681SAndroid Build Coastguard Worker %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins) 113*9880d681SAndroid Build Coastguard Worker %ext = extractelement <4 x float> %res, i32 0 114*9880d681SAndroid Build Coastguard Worker ret float %ext 115*9880d681SAndroid Build Coastguard Worker} 116*9880d681SAndroid Build Coastguard Worker 117*9880d681SAndroid Build Coastguard Workerdefine float @sqrtss_size(float* %a) optsize{ 118*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: sqrtss_size: 119*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 120*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: sqrtss (%rdi), %xmm0 121*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 122*9880d681SAndroid Build Coastguard Worker; 123*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: sqrtss_size: 124*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 125*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vsqrtss (%rdi), %xmm0, %xmm0 126*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 127*9880d681SAndroid Build Coastguard Worker %ld = load float, float* %a 128*9880d681SAndroid Build Coastguard Worker %ins = insertelement <4 x float> undef, float %ld, i32 0 129*9880d681SAndroid Build Coastguard Worker %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins) 130*9880d681SAndroid Build Coastguard Worker %ext = extractelement <4 x float> %res, i32 0 131*9880d681SAndroid Build Coastguard Worker ret float %ext 132*9880d681SAndroid Build Coastguard Worker} 133*9880d681SAndroid Build Coastguard Worker 134*9880d681SAndroid Build Coastguard Workerdefine double @sqrtsd_size(double* %a) optsize { 135*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: sqrtsd_size: 136*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 137*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: sqrtsd (%rdi), %xmm0 138*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 139*9880d681SAndroid Build Coastguard Worker; 140*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: sqrtsd_size: 141*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 142*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0 143*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 144*9880d681SAndroid Build Coastguard Worker %ld = load double, double* %a 145*9880d681SAndroid Build Coastguard Worker %ins = insertelement <2 x double> undef, double %ld, i32 0 146*9880d681SAndroid Build Coastguard Worker %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins) 147*9880d681SAndroid Build Coastguard Worker %ext = extractelement <2 x double> %res, i32 0 148*9880d681SAndroid Build Coastguard Worker ret double %ext 149*9880d681SAndroid Build Coastguard Worker} 150*9880d681SAndroid Build Coastguard Worker 151*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 152*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 153*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 154*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 155