1*67e74705SXin Li //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2*67e74705SXin Li //
3*67e74705SXin Li // The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li //
10*67e74705SXin Li // This contains code to emit Builtin calls as LLVM code.
11*67e74705SXin Li //
12*67e74705SXin Li //===----------------------------------------------------------------------===//
13*67e74705SXin Li
14*67e74705SXin Li #include "CodeGenFunction.h"
15*67e74705SXin Li #include "CGCXXABI.h"
16*67e74705SXin Li #include "CGObjCRuntime.h"
17*67e74705SXin Li #include "CodeGenModule.h"
18*67e74705SXin Li #include "TargetInfo.h"
19*67e74705SXin Li #include "clang/AST/ASTContext.h"
20*67e74705SXin Li #include "clang/AST/Decl.h"
21*67e74705SXin Li #include "clang/Basic/TargetBuiltins.h"
22*67e74705SXin Li #include "clang/Basic/TargetInfo.h"
23*67e74705SXin Li #include "clang/CodeGen/CGFunctionInfo.h"
24*67e74705SXin Li #include "llvm/ADT/StringExtras.h"
25*67e74705SXin Li #include "llvm/IR/CallSite.h"
26*67e74705SXin Li #include "llvm/IR/DataLayout.h"
27*67e74705SXin Li #include "llvm/IR/InlineAsm.h"
28*67e74705SXin Li #include "llvm/IR/Intrinsics.h"
29*67e74705SXin Li #include "llvm/IR/MDBuilder.h"
30*67e74705SXin Li #include <sstream>
31*67e74705SXin Li
32*67e74705SXin Li using namespace clang;
33*67e74705SXin Li using namespace CodeGen;
34*67e74705SXin Li using namespace llvm;
35*67e74705SXin Li
36*67e74705SXin Li /// getBuiltinLibFunction - Given a builtin id for a function like
37*67e74705SXin Li /// "__builtin_fabsf", return a Function* for "fabsf".
getBuiltinLibFunction(const FunctionDecl * FD,unsigned BuiltinID)38*67e74705SXin Li llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
39*67e74705SXin Li unsigned BuiltinID) {
40*67e74705SXin Li assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
41*67e74705SXin Li
42*67e74705SXin Li // Get the name, skip over the __builtin_ prefix (if necessary).
43*67e74705SXin Li StringRef Name;
44*67e74705SXin Li GlobalDecl D(FD);
45*67e74705SXin Li
46*67e74705SXin Li // If the builtin has been declared explicitly with an assembler label,
47*67e74705SXin Li // use the mangled name. This differs from the plain label on platforms
48*67e74705SXin Li // that prefix labels.
49*67e74705SXin Li if (FD->hasAttr<AsmLabelAttr>())
50*67e74705SXin Li Name = getMangledName(D);
51*67e74705SXin Li else
52*67e74705SXin Li Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
53*67e74705SXin Li
54*67e74705SXin Li llvm::FunctionType *Ty =
55*67e74705SXin Li cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
56*67e74705SXin Li
57*67e74705SXin Li return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
58*67e74705SXin Li }
59*67e74705SXin Li
60*67e74705SXin Li /// Emit the conversions required to turn the given value into an
61*67e74705SXin Li /// integer of the given size.
EmitToInt(CodeGenFunction & CGF,llvm::Value * V,QualType T,llvm::IntegerType * IntType)62*67e74705SXin Li static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
63*67e74705SXin Li QualType T, llvm::IntegerType *IntType) {
64*67e74705SXin Li V = CGF.EmitToMemory(V, T);
65*67e74705SXin Li
66*67e74705SXin Li if (V->getType()->isPointerTy())
67*67e74705SXin Li return CGF.Builder.CreatePtrToInt(V, IntType);
68*67e74705SXin Li
69*67e74705SXin Li assert(V->getType() == IntType);
70*67e74705SXin Li return V;
71*67e74705SXin Li }
72*67e74705SXin Li
EmitFromInt(CodeGenFunction & CGF,llvm::Value * V,QualType T,llvm::Type * ResultType)73*67e74705SXin Li static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
74*67e74705SXin Li QualType T, llvm::Type *ResultType) {
75*67e74705SXin Li V = CGF.EmitFromMemory(V, T);
76*67e74705SXin Li
77*67e74705SXin Li if (ResultType->isPointerTy())
78*67e74705SXin Li return CGF.Builder.CreateIntToPtr(V, ResultType);
79*67e74705SXin Li
80*67e74705SXin Li assert(V->getType() == ResultType);
81*67e74705SXin Li return V;
82*67e74705SXin Li }
83*67e74705SXin Li
84*67e74705SXin Li /// Utility to insert an atomic instruction based on Instrinsic::ID
85*67e74705SXin Li /// and the expression node.
MakeBinaryAtomicValue(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E)86*67e74705SXin Li static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
87*67e74705SXin Li llvm::AtomicRMWInst::BinOp Kind,
88*67e74705SXin Li const CallExpr *E) {
89*67e74705SXin Li QualType T = E->getType();
90*67e74705SXin Li assert(E->getArg(0)->getType()->isPointerType());
91*67e74705SXin Li assert(CGF.getContext().hasSameUnqualifiedType(T,
92*67e74705SXin Li E->getArg(0)->getType()->getPointeeType()));
93*67e74705SXin Li assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
94*67e74705SXin Li
95*67e74705SXin Li llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
96*67e74705SXin Li unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
97*67e74705SXin Li
98*67e74705SXin Li llvm::IntegerType *IntType =
99*67e74705SXin Li llvm::IntegerType::get(CGF.getLLVMContext(),
100*67e74705SXin Li CGF.getContext().getTypeSize(T));
101*67e74705SXin Li llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
102*67e74705SXin Li
103*67e74705SXin Li llvm::Value *Args[2];
104*67e74705SXin Li Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
105*67e74705SXin Li Args[1] = CGF.EmitScalarExpr(E->getArg(1));
106*67e74705SXin Li llvm::Type *ValueType = Args[1]->getType();
107*67e74705SXin Li Args[1] = EmitToInt(CGF, Args[1], T, IntType);
108*67e74705SXin Li
109*67e74705SXin Li llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
110*67e74705SXin Li Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
111*67e74705SXin Li return EmitFromInt(CGF, Result, T, ValueType);
112*67e74705SXin Li }
113*67e74705SXin Li
EmitNontemporalStore(CodeGenFunction & CGF,const CallExpr * E)114*67e74705SXin Li static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
115*67e74705SXin Li Value *Val = CGF.EmitScalarExpr(E->getArg(0));
116*67e74705SXin Li Value *Address = CGF.EmitScalarExpr(E->getArg(1));
117*67e74705SXin Li
118*67e74705SXin Li // Convert the type of the pointer to a pointer to the stored type.
119*67e74705SXin Li Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
120*67e74705SXin Li Value *BC = CGF.Builder.CreateBitCast(
121*67e74705SXin Li Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
122*67e74705SXin Li LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
123*67e74705SXin Li LV.setNontemporal(true);
124*67e74705SXin Li CGF.EmitStoreOfScalar(Val, LV, false);
125*67e74705SXin Li return nullptr;
126*67e74705SXin Li }
127*67e74705SXin Li
EmitNontemporalLoad(CodeGenFunction & CGF,const CallExpr * E)128*67e74705SXin Li static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
129*67e74705SXin Li Value *Address = CGF.EmitScalarExpr(E->getArg(0));
130*67e74705SXin Li
131*67e74705SXin Li LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
132*67e74705SXin Li LV.setNontemporal(true);
133*67e74705SXin Li return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
134*67e74705SXin Li }
135*67e74705SXin Li
EmitBinaryAtomic(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E)136*67e74705SXin Li static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
137*67e74705SXin Li llvm::AtomicRMWInst::BinOp Kind,
138*67e74705SXin Li const CallExpr *E) {
139*67e74705SXin Li return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
140*67e74705SXin Li }
141*67e74705SXin Li
142*67e74705SXin Li /// Utility to insert an atomic instruction based Instrinsic::ID and
143*67e74705SXin Li /// the expression node, where the return value is the result of the
144*67e74705SXin Li /// operation.
EmitBinaryAtomicPost(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E,Instruction::BinaryOps Op,bool Invert=false)145*67e74705SXin Li static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
146*67e74705SXin Li llvm::AtomicRMWInst::BinOp Kind,
147*67e74705SXin Li const CallExpr *E,
148*67e74705SXin Li Instruction::BinaryOps Op,
149*67e74705SXin Li bool Invert = false) {
150*67e74705SXin Li QualType T = E->getType();
151*67e74705SXin Li assert(E->getArg(0)->getType()->isPointerType());
152*67e74705SXin Li assert(CGF.getContext().hasSameUnqualifiedType(T,
153*67e74705SXin Li E->getArg(0)->getType()->getPointeeType()));
154*67e74705SXin Li assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
155*67e74705SXin Li
156*67e74705SXin Li llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
157*67e74705SXin Li unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
158*67e74705SXin Li
159*67e74705SXin Li llvm::IntegerType *IntType =
160*67e74705SXin Li llvm::IntegerType::get(CGF.getLLVMContext(),
161*67e74705SXin Li CGF.getContext().getTypeSize(T));
162*67e74705SXin Li llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
163*67e74705SXin Li
164*67e74705SXin Li llvm::Value *Args[2];
165*67e74705SXin Li Args[1] = CGF.EmitScalarExpr(E->getArg(1));
166*67e74705SXin Li llvm::Type *ValueType = Args[1]->getType();
167*67e74705SXin Li Args[1] = EmitToInt(CGF, Args[1], T, IntType);
168*67e74705SXin Li Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
169*67e74705SXin Li
170*67e74705SXin Li llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
171*67e74705SXin Li Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
172*67e74705SXin Li Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
173*67e74705SXin Li if (Invert)
174*67e74705SXin Li Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
175*67e74705SXin Li llvm::ConstantInt::get(IntType, -1));
176*67e74705SXin Li Result = EmitFromInt(CGF, Result, T, ValueType);
177*67e74705SXin Li return RValue::get(Result);
178*67e74705SXin Li }
179*67e74705SXin Li
180*67e74705SXin Li /// @brief Utility to insert an atomic cmpxchg instruction.
181*67e74705SXin Li ///
182*67e74705SXin Li /// @param CGF The current codegen function.
183*67e74705SXin Li /// @param E Builtin call expression to convert to cmpxchg.
184*67e74705SXin Li /// arg0 - address to operate on
185*67e74705SXin Li /// arg1 - value to compare with
186*67e74705SXin Li /// arg2 - new value
187*67e74705SXin Li /// @param ReturnBool Specifies whether to return success flag of
188*67e74705SXin Li /// cmpxchg result or the old value.
189*67e74705SXin Li ///
190*67e74705SXin Li /// @returns result of cmpxchg, according to ReturnBool
MakeAtomicCmpXchgValue(CodeGenFunction & CGF,const CallExpr * E,bool ReturnBool)191*67e74705SXin Li static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
192*67e74705SXin Li bool ReturnBool) {
193*67e74705SXin Li QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
194*67e74705SXin Li llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
195*67e74705SXin Li unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
196*67e74705SXin Li
197*67e74705SXin Li llvm::IntegerType *IntType = llvm::IntegerType::get(
198*67e74705SXin Li CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
199*67e74705SXin Li llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
200*67e74705SXin Li
201*67e74705SXin Li Value *Args[3];
202*67e74705SXin Li Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
203*67e74705SXin Li Args[1] = CGF.EmitScalarExpr(E->getArg(1));
204*67e74705SXin Li llvm::Type *ValueType = Args[1]->getType();
205*67e74705SXin Li Args[1] = EmitToInt(CGF, Args[1], T, IntType);
206*67e74705SXin Li Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
207*67e74705SXin Li
208*67e74705SXin Li Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
209*67e74705SXin Li Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
210*67e74705SXin Li llvm::AtomicOrdering::SequentiallyConsistent);
211*67e74705SXin Li if (ReturnBool)
212*67e74705SXin Li // Extract boolean success flag and zext it to int.
213*67e74705SXin Li return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
214*67e74705SXin Li CGF.ConvertType(E->getType()));
215*67e74705SXin Li else
216*67e74705SXin Li // Extract old value and emit it using the same type as compare value.
217*67e74705SXin Li return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
218*67e74705SXin Li ValueType);
219*67e74705SXin Li }
220*67e74705SXin Li
221*67e74705SXin Li // Emit a simple mangled intrinsic that has 1 argument and a return type
222*67e74705SXin Li // matching the argument type.
emitUnaryBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID)223*67e74705SXin Li static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
224*67e74705SXin Li const CallExpr *E,
225*67e74705SXin Li unsigned IntrinsicID) {
226*67e74705SXin Li llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
227*67e74705SXin Li
228*67e74705SXin Li Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
229*67e74705SXin Li return CGF.Builder.CreateCall(F, Src0);
230*67e74705SXin Li }
231*67e74705SXin Li
232*67e74705SXin Li // Emit an intrinsic that has 2 operands of the same type as its result.
emitBinaryBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID)233*67e74705SXin Li static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
234*67e74705SXin Li const CallExpr *E,
235*67e74705SXin Li unsigned IntrinsicID) {
236*67e74705SXin Li llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
237*67e74705SXin Li llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
238*67e74705SXin Li
239*67e74705SXin Li Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
240*67e74705SXin Li return CGF.Builder.CreateCall(F, { Src0, Src1 });
241*67e74705SXin Li }
242*67e74705SXin Li
243*67e74705SXin Li // Emit an intrinsic that has 3 operands of the same type as its result.
emitTernaryBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID)244*67e74705SXin Li static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
245*67e74705SXin Li const CallExpr *E,
246*67e74705SXin Li unsigned IntrinsicID) {
247*67e74705SXin Li llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
248*67e74705SXin Li llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
249*67e74705SXin Li llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
250*67e74705SXin Li
251*67e74705SXin Li Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
252*67e74705SXin Li return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
253*67e74705SXin Li }
254*67e74705SXin Li
255*67e74705SXin Li // Emit an intrinsic that has 1 float or double operand, and 1 integer.
emitFPIntBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID)256*67e74705SXin Li static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
257*67e74705SXin Li const CallExpr *E,
258*67e74705SXin Li unsigned IntrinsicID) {
259*67e74705SXin Li llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
260*67e74705SXin Li llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
261*67e74705SXin Li
262*67e74705SXin Li Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
263*67e74705SXin Li return CGF.Builder.CreateCall(F, {Src0, Src1});
264*67e74705SXin Li }
265*67e74705SXin Li
266*67e74705SXin Li /// EmitFAbs - Emit a call to @llvm.fabs().
EmitFAbs(CodeGenFunction & CGF,Value * V)267*67e74705SXin Li static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
268*67e74705SXin Li Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
269*67e74705SXin Li llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
270*67e74705SXin Li Call->setDoesNotAccessMemory();
271*67e74705SXin Li return Call;
272*67e74705SXin Li }
273*67e74705SXin Li
274*67e74705SXin Li /// Emit the computation of the sign bit for a floating point value. Returns
275*67e74705SXin Li /// the i1 sign bit value.
EmitSignBit(CodeGenFunction & CGF,Value * V)276*67e74705SXin Li static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
277*67e74705SXin Li LLVMContext &C = CGF.CGM.getLLVMContext();
278*67e74705SXin Li
279*67e74705SXin Li llvm::Type *Ty = V->getType();
280*67e74705SXin Li int Width = Ty->getPrimitiveSizeInBits();
281*67e74705SXin Li llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
282*67e74705SXin Li V = CGF.Builder.CreateBitCast(V, IntTy);
283*67e74705SXin Li if (Ty->isPPC_FP128Ty()) {
284*67e74705SXin Li // We want the sign bit of the higher-order double. The bitcast we just
285*67e74705SXin Li // did works as if the double-double was stored to memory and then
286*67e74705SXin Li // read as an i128. The "store" will put the higher-order double in the
287*67e74705SXin Li // lower address in both little- and big-Endian modes, but the "load"
288*67e74705SXin Li // will treat those bits as a different part of the i128: the low bits in
289*67e74705SXin Li // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
290*67e74705SXin Li // we need to shift the high bits down to the low before truncating.
291*67e74705SXin Li Width >>= 1;
292*67e74705SXin Li if (CGF.getTarget().isBigEndian()) {
293*67e74705SXin Li Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
294*67e74705SXin Li V = CGF.Builder.CreateLShr(V, ShiftCst);
295*67e74705SXin Li }
296*67e74705SXin Li // We are truncating value in order to extract the higher-order
297*67e74705SXin Li // double, which we will be using to extract the sign from.
298*67e74705SXin Li IntTy = llvm::IntegerType::get(C, Width);
299*67e74705SXin Li V = CGF.Builder.CreateTrunc(V, IntTy);
300*67e74705SXin Li }
301*67e74705SXin Li Value *Zero = llvm::Constant::getNullValue(IntTy);
302*67e74705SXin Li return CGF.Builder.CreateICmpSLT(V, Zero);
303*67e74705SXin Li }
304*67e74705SXin Li
emitLibraryCall(CodeGenFunction & CGF,const FunctionDecl * Fn,const CallExpr * E,llvm::Value * calleeValue)305*67e74705SXin Li static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
306*67e74705SXin Li const CallExpr *E, llvm::Value *calleeValue) {
307*67e74705SXin Li return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
308*67e74705SXin Li ReturnValueSlot(), Fn);
309*67e74705SXin Li }
310*67e74705SXin Li
311*67e74705SXin Li /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
312*67e74705SXin Li /// depending on IntrinsicID.
313*67e74705SXin Li ///
314*67e74705SXin Li /// \arg CGF The current codegen function.
315*67e74705SXin Li /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
316*67e74705SXin Li /// \arg X The first argument to the llvm.*.with.overflow.*.
317*67e74705SXin Li /// \arg Y The second argument to the llvm.*.with.overflow.*.
318*67e74705SXin Li /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
319*67e74705SXin Li /// \returns The result (i.e. sum/product) returned by the intrinsic.
EmitOverflowIntrinsic(CodeGenFunction & CGF,const llvm::Intrinsic::ID IntrinsicID,llvm::Value * X,llvm::Value * Y,llvm::Value * & Carry)320*67e74705SXin Li static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
321*67e74705SXin Li const llvm::Intrinsic::ID IntrinsicID,
322*67e74705SXin Li llvm::Value *X, llvm::Value *Y,
323*67e74705SXin Li llvm::Value *&Carry) {
324*67e74705SXin Li // Make sure we have integers of the same width.
325*67e74705SXin Li assert(X->getType() == Y->getType() &&
326*67e74705SXin Li "Arguments must be the same type. (Did you forget to make sure both "
327*67e74705SXin Li "arguments have the same integer width?)");
328*67e74705SXin Li
329*67e74705SXin Li llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
330*67e74705SXin Li llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
331*67e74705SXin Li Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
332*67e74705SXin Li return CGF.Builder.CreateExtractValue(Tmp, 0);
333*67e74705SXin Li }
334*67e74705SXin Li
emitRangedBuiltin(CodeGenFunction & CGF,unsigned IntrinsicID,int low,int high)335*67e74705SXin Li static Value *emitRangedBuiltin(CodeGenFunction &CGF,
336*67e74705SXin Li unsigned IntrinsicID,
337*67e74705SXin Li int low, int high) {
338*67e74705SXin Li llvm::MDBuilder MDHelper(CGF.getLLVMContext());
339*67e74705SXin Li llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
340*67e74705SXin Li Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
341*67e74705SXin Li llvm::Instruction *Call = CGF.Builder.CreateCall(F);
342*67e74705SXin Li Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
343*67e74705SXin Li return Call;
344*67e74705SXin Li }
345*67e74705SXin Li
346*67e74705SXin Li namespace {
347*67e74705SXin Li struct WidthAndSignedness {
348*67e74705SXin Li unsigned Width;
349*67e74705SXin Li bool Signed;
350*67e74705SXin Li };
351*67e74705SXin Li }
352*67e74705SXin Li
353*67e74705SXin Li static WidthAndSignedness
getIntegerWidthAndSignedness(const clang::ASTContext & context,const clang::QualType Type)354*67e74705SXin Li getIntegerWidthAndSignedness(const clang::ASTContext &context,
355*67e74705SXin Li const clang::QualType Type) {
356*67e74705SXin Li assert(Type->isIntegerType() && "Given type is not an integer.");
357*67e74705SXin Li unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
358*67e74705SXin Li bool Signed = Type->isSignedIntegerType();
359*67e74705SXin Li return {Width, Signed};
360*67e74705SXin Li }
361*67e74705SXin Li
362*67e74705SXin Li // Given one or more integer types, this function produces an integer type that
363*67e74705SXin Li // encompasses them: any value in one of the given types could be expressed in
364*67e74705SXin Li // the encompassing type.
365*67e74705SXin Li static struct WidthAndSignedness
EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types)366*67e74705SXin Li EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
367*67e74705SXin Li assert(Types.size() > 0 && "Empty list of types.");
368*67e74705SXin Li
369*67e74705SXin Li // If any of the given types is signed, we must return a signed type.
370*67e74705SXin Li bool Signed = false;
371*67e74705SXin Li for (const auto &Type : Types) {
372*67e74705SXin Li Signed |= Type.Signed;
373*67e74705SXin Li }
374*67e74705SXin Li
375*67e74705SXin Li // The encompassing type must have a width greater than or equal to the width
376*67e74705SXin Li // of the specified types. Aditionally, if the encompassing type is signed,
377*67e74705SXin Li // its width must be strictly greater than the width of any unsigned types
378*67e74705SXin Li // given.
379*67e74705SXin Li unsigned Width = 0;
380*67e74705SXin Li for (const auto &Type : Types) {
381*67e74705SXin Li unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
382*67e74705SXin Li if (Width < MinWidth) {
383*67e74705SXin Li Width = MinWidth;
384*67e74705SXin Li }
385*67e74705SXin Li }
386*67e74705SXin Li
387*67e74705SXin Li return {Width, Signed};
388*67e74705SXin Li }
389*67e74705SXin Li
EmitVAStartEnd(Value * ArgValue,bool IsStart)390*67e74705SXin Li Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
391*67e74705SXin Li llvm::Type *DestType = Int8PtrTy;
392*67e74705SXin Li if (ArgValue->getType() != DestType)
393*67e74705SXin Li ArgValue =
394*67e74705SXin Li Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
395*67e74705SXin Li
396*67e74705SXin Li Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
397*67e74705SXin Li return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
398*67e74705SXin Li }
399*67e74705SXin Li
400*67e74705SXin Li /// Checks if using the result of __builtin_object_size(p, @p From) in place of
401*67e74705SXin Li /// __builtin_object_size(p, @p To) is correct
areBOSTypesCompatible(int From,int To)402*67e74705SXin Li static bool areBOSTypesCompatible(int From, int To) {
403*67e74705SXin Li // Note: Our __builtin_object_size implementation currently treats Type=0 and
404*67e74705SXin Li // Type=2 identically. Encoding this implementation detail here may make
405*67e74705SXin Li // improving __builtin_object_size difficult in the future, so it's omitted.
406*67e74705SXin Li return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
407*67e74705SXin Li }
408*67e74705SXin Li
409*67e74705SXin Li static llvm::Value *
getDefaultBuiltinObjectSizeResult(unsigned Type,llvm::IntegerType * ResType)410*67e74705SXin Li getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
411*67e74705SXin Li return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
412*67e74705SXin Li }
413*67e74705SXin Li
414*67e74705SXin Li llvm::Value *
evaluateOrEmitBuiltinObjectSize(const Expr * E,unsigned Type,llvm::IntegerType * ResType)415*67e74705SXin Li CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
416*67e74705SXin Li llvm::IntegerType *ResType) {
417*67e74705SXin Li uint64_t ObjectSize;
418*67e74705SXin Li if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
419*67e74705SXin Li return emitBuiltinObjectSize(E, Type, ResType);
420*67e74705SXin Li return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
421*67e74705SXin Li }
422*67e74705SXin Li
423*67e74705SXin Li /// Returns a Value corresponding to the size of the given expression.
424*67e74705SXin Li /// This Value may be either of the following:
425*67e74705SXin Li /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
426*67e74705SXin Li /// it)
427*67e74705SXin Li /// - A call to the @llvm.objectsize intrinsic
428*67e74705SXin Li llvm::Value *
emitBuiltinObjectSize(const Expr * E,unsigned Type,llvm::IntegerType * ResType)429*67e74705SXin Li CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
430*67e74705SXin Li llvm::IntegerType *ResType) {
431*67e74705SXin Li // We need to reference an argument if the pointer is a parameter with the
432*67e74705SXin Li // pass_object_size attribute.
433*67e74705SXin Li if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
434*67e74705SXin Li auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
435*67e74705SXin Li auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
436*67e74705SXin Li if (Param != nullptr && PS != nullptr &&
437*67e74705SXin Li areBOSTypesCompatible(PS->getType(), Type)) {
438*67e74705SXin Li auto Iter = SizeArguments.find(Param);
439*67e74705SXin Li assert(Iter != SizeArguments.end());
440*67e74705SXin Li
441*67e74705SXin Li const ImplicitParamDecl *D = Iter->second;
442*67e74705SXin Li auto DIter = LocalDeclMap.find(D);
443*67e74705SXin Li assert(DIter != LocalDeclMap.end());
444*67e74705SXin Li
445*67e74705SXin Li return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
446*67e74705SXin Li getContext().getSizeType(), E->getLocStart());
447*67e74705SXin Li }
448*67e74705SXin Li }
449*67e74705SXin Li
450*67e74705SXin Li // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
451*67e74705SXin Li // evaluate E for side-effects. In either case, we shouldn't lower to
452*67e74705SXin Li // @llvm.objectsize.
453*67e74705SXin Li if (Type == 3 || E->HasSideEffects(getContext()))
454*67e74705SXin Li return getDefaultBuiltinObjectSizeResult(Type, ResType);
455*67e74705SXin Li
456*67e74705SXin Li // LLVM only supports 0 and 2, make sure that we pass along that
457*67e74705SXin Li // as a boolean.
458*67e74705SXin Li auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
459*67e74705SXin Li // FIXME: Get right address space.
460*67e74705SXin Li llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
461*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
462*67e74705SXin Li return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
463*67e74705SXin Li }
464*67e74705SXin Li
EmitBuiltinExpr(const FunctionDecl * FD,unsigned BuiltinID,const CallExpr * E,ReturnValueSlot ReturnValue)465*67e74705SXin Li RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
466*67e74705SXin Li unsigned BuiltinID, const CallExpr *E,
467*67e74705SXin Li ReturnValueSlot ReturnValue) {
468*67e74705SXin Li // See if we can constant fold this builtin. If so, don't emit it at all.
469*67e74705SXin Li Expr::EvalResult Result;
470*67e74705SXin Li if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
471*67e74705SXin Li !Result.hasSideEffects()) {
472*67e74705SXin Li if (Result.Val.isInt())
473*67e74705SXin Li return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
474*67e74705SXin Li Result.Val.getInt()));
475*67e74705SXin Li if (Result.Val.isFloat())
476*67e74705SXin Li return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
477*67e74705SXin Li Result.Val.getFloat()));
478*67e74705SXin Li }
479*67e74705SXin Li
480*67e74705SXin Li switch (BuiltinID) {
481*67e74705SXin Li default: break; // Handle intrinsics and libm functions below.
482*67e74705SXin Li case Builtin::BI__builtin___CFStringMakeConstantString:
483*67e74705SXin Li case Builtin::BI__builtin___NSStringMakeConstantString:
484*67e74705SXin Li return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
485*67e74705SXin Li case Builtin::BI__builtin_stdarg_start:
486*67e74705SXin Li case Builtin::BI__builtin_va_start:
487*67e74705SXin Li case Builtin::BI__va_start:
488*67e74705SXin Li case Builtin::BI__builtin_va_end:
489*67e74705SXin Li return RValue::get(
490*67e74705SXin Li EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
491*67e74705SXin Li ? EmitScalarExpr(E->getArg(0))
492*67e74705SXin Li : EmitVAListRef(E->getArg(0)).getPointer(),
493*67e74705SXin Li BuiltinID != Builtin::BI__builtin_va_end));
494*67e74705SXin Li case Builtin::BI__builtin_va_copy: {
495*67e74705SXin Li Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
496*67e74705SXin Li Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
497*67e74705SXin Li
498*67e74705SXin Li llvm::Type *Type = Int8PtrTy;
499*67e74705SXin Li
500*67e74705SXin Li DstPtr = Builder.CreateBitCast(DstPtr, Type);
501*67e74705SXin Li SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
502*67e74705SXin Li return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
503*67e74705SXin Li {DstPtr, SrcPtr}));
504*67e74705SXin Li }
505*67e74705SXin Li case Builtin::BI__builtin_abs:
506*67e74705SXin Li case Builtin::BI__builtin_labs:
507*67e74705SXin Li case Builtin::BI__builtin_llabs: {
508*67e74705SXin Li Value *ArgValue = EmitScalarExpr(E->getArg(0));
509*67e74705SXin Li
510*67e74705SXin Li Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
511*67e74705SXin Li Value *CmpResult =
512*67e74705SXin Li Builder.CreateICmpSGE(ArgValue,
513*67e74705SXin Li llvm::Constant::getNullValue(ArgValue->getType()),
514*67e74705SXin Li "abscond");
515*67e74705SXin Li Value *Result =
516*67e74705SXin Li Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
517*67e74705SXin Li
518*67e74705SXin Li return RValue::get(Result);
519*67e74705SXin Li }
520*67e74705SXin Li case Builtin::BI__builtin_fabs:
521*67e74705SXin Li case Builtin::BI__builtin_fabsf:
522*67e74705SXin Li case Builtin::BI__builtin_fabsl: {
523*67e74705SXin Li return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
524*67e74705SXin Li }
525*67e74705SXin Li case Builtin::BI__builtin_fmod:
526*67e74705SXin Li case Builtin::BI__builtin_fmodf:
527*67e74705SXin Li case Builtin::BI__builtin_fmodl: {
528*67e74705SXin Li Value *Arg1 = EmitScalarExpr(E->getArg(0));
529*67e74705SXin Li Value *Arg2 = EmitScalarExpr(E->getArg(1));
530*67e74705SXin Li Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
531*67e74705SXin Li return RValue::get(Result);
532*67e74705SXin Li }
533*67e74705SXin Li case Builtin::BI__builtin_copysign:
534*67e74705SXin Li case Builtin::BI__builtin_copysignf:
535*67e74705SXin Li case Builtin::BI__builtin_copysignl: {
536*67e74705SXin Li return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
537*67e74705SXin Li }
538*67e74705SXin Li case Builtin::BI__builtin_ceil:
539*67e74705SXin Li case Builtin::BI__builtin_ceilf:
540*67e74705SXin Li case Builtin::BI__builtin_ceill: {
541*67e74705SXin Li return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
542*67e74705SXin Li }
543*67e74705SXin Li case Builtin::BI__builtin_floor:
544*67e74705SXin Li case Builtin::BI__builtin_floorf:
545*67e74705SXin Li case Builtin::BI__builtin_floorl: {
546*67e74705SXin Li return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
547*67e74705SXin Li }
548*67e74705SXin Li case Builtin::BI__builtin_trunc:
549*67e74705SXin Li case Builtin::BI__builtin_truncf:
550*67e74705SXin Li case Builtin::BI__builtin_truncl: {
551*67e74705SXin Li return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
552*67e74705SXin Li }
553*67e74705SXin Li case Builtin::BI__builtin_rint:
554*67e74705SXin Li case Builtin::BI__builtin_rintf:
555*67e74705SXin Li case Builtin::BI__builtin_rintl: {
556*67e74705SXin Li return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
557*67e74705SXin Li }
558*67e74705SXin Li case Builtin::BI__builtin_nearbyint:
559*67e74705SXin Li case Builtin::BI__builtin_nearbyintf:
560*67e74705SXin Li case Builtin::BI__builtin_nearbyintl: {
561*67e74705SXin Li return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
562*67e74705SXin Li }
563*67e74705SXin Li case Builtin::BI__builtin_round:
564*67e74705SXin Li case Builtin::BI__builtin_roundf:
565*67e74705SXin Li case Builtin::BI__builtin_roundl: {
566*67e74705SXin Li return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
567*67e74705SXin Li }
568*67e74705SXin Li case Builtin::BI__builtin_fmin:
569*67e74705SXin Li case Builtin::BI__builtin_fminf:
570*67e74705SXin Li case Builtin::BI__builtin_fminl: {
571*67e74705SXin Li return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
572*67e74705SXin Li }
573*67e74705SXin Li case Builtin::BI__builtin_fmax:
574*67e74705SXin Li case Builtin::BI__builtin_fmaxf:
575*67e74705SXin Li case Builtin::BI__builtin_fmaxl: {
576*67e74705SXin Li return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
577*67e74705SXin Li }
578*67e74705SXin Li case Builtin::BI__builtin_conj:
579*67e74705SXin Li case Builtin::BI__builtin_conjf:
580*67e74705SXin Li case Builtin::BI__builtin_conjl: {
581*67e74705SXin Li ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
582*67e74705SXin Li Value *Real = ComplexVal.first;
583*67e74705SXin Li Value *Imag = ComplexVal.second;
584*67e74705SXin Li Value *Zero =
585*67e74705SXin Li Imag->getType()->isFPOrFPVectorTy()
586*67e74705SXin Li ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
587*67e74705SXin Li : llvm::Constant::getNullValue(Imag->getType());
588*67e74705SXin Li
589*67e74705SXin Li Imag = Builder.CreateFSub(Zero, Imag, "sub");
590*67e74705SXin Li return RValue::getComplex(std::make_pair(Real, Imag));
591*67e74705SXin Li }
592*67e74705SXin Li case Builtin::BI__builtin_creal:
593*67e74705SXin Li case Builtin::BI__builtin_crealf:
594*67e74705SXin Li case Builtin::BI__builtin_creall:
595*67e74705SXin Li case Builtin::BIcreal:
596*67e74705SXin Li case Builtin::BIcrealf:
597*67e74705SXin Li case Builtin::BIcreall: {
598*67e74705SXin Li ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
599*67e74705SXin Li return RValue::get(ComplexVal.first);
600*67e74705SXin Li }
601*67e74705SXin Li
602*67e74705SXin Li case Builtin::BI__builtin_cimag:
603*67e74705SXin Li case Builtin::BI__builtin_cimagf:
604*67e74705SXin Li case Builtin::BI__builtin_cimagl:
605*67e74705SXin Li case Builtin::BIcimag:
606*67e74705SXin Li case Builtin::BIcimagf:
607*67e74705SXin Li case Builtin::BIcimagl: {
608*67e74705SXin Li ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
609*67e74705SXin Li return RValue::get(ComplexVal.second);
610*67e74705SXin Li }
611*67e74705SXin Li
612*67e74705SXin Li case Builtin::BI__builtin_ctzs:
613*67e74705SXin Li case Builtin::BI__builtin_ctz:
614*67e74705SXin Li case Builtin::BI__builtin_ctzl:
615*67e74705SXin Li case Builtin::BI__builtin_ctzll: {
616*67e74705SXin Li Value *ArgValue = EmitScalarExpr(E->getArg(0));
617*67e74705SXin Li
618*67e74705SXin Li llvm::Type *ArgType = ArgValue->getType();
619*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
620*67e74705SXin Li
621*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
622*67e74705SXin Li Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
623*67e74705SXin Li Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
624*67e74705SXin Li if (Result->getType() != ResultType)
625*67e74705SXin Li Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
626*67e74705SXin Li "cast");
627*67e74705SXin Li return RValue::get(Result);
628*67e74705SXin Li }
629*67e74705SXin Li case Builtin::BI__builtin_clzs:
630*67e74705SXin Li case Builtin::BI__builtin_clz:
631*67e74705SXin Li case Builtin::BI__builtin_clzl:
632*67e74705SXin Li case Builtin::BI__builtin_clzll: {
633*67e74705SXin Li Value *ArgValue = EmitScalarExpr(E->getArg(0));
634*67e74705SXin Li
635*67e74705SXin Li llvm::Type *ArgType = ArgValue->getType();
636*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
637*67e74705SXin Li
638*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
639*67e74705SXin Li Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
640*67e74705SXin Li Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
641*67e74705SXin Li if (Result->getType() != ResultType)
642*67e74705SXin Li Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
643*67e74705SXin Li "cast");
644*67e74705SXin Li return RValue::get(Result);
645*67e74705SXin Li }
646*67e74705SXin Li case Builtin::BI__builtin_ffs:
647*67e74705SXin Li case Builtin::BI__builtin_ffsl:
648*67e74705SXin Li case Builtin::BI__builtin_ffsll: {
649*67e74705SXin Li // ffs(x) -> x ? cttz(x) + 1 : 0
650*67e74705SXin Li Value *ArgValue = EmitScalarExpr(E->getArg(0));
651*67e74705SXin Li
652*67e74705SXin Li llvm::Type *ArgType = ArgValue->getType();
653*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
654*67e74705SXin Li
655*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
656*67e74705SXin Li Value *Tmp =
657*67e74705SXin Li Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
658*67e74705SXin Li llvm::ConstantInt::get(ArgType, 1));
659*67e74705SXin Li Value *Zero = llvm::Constant::getNullValue(ArgType);
660*67e74705SXin Li Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
661*67e74705SXin Li Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
662*67e74705SXin Li if (Result->getType() != ResultType)
663*67e74705SXin Li Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
664*67e74705SXin Li "cast");
665*67e74705SXin Li return RValue::get(Result);
666*67e74705SXin Li }
667*67e74705SXin Li case Builtin::BI__builtin_parity:
668*67e74705SXin Li case Builtin::BI__builtin_parityl:
669*67e74705SXin Li case Builtin::BI__builtin_parityll: {
670*67e74705SXin Li // parity(x) -> ctpop(x) & 1
671*67e74705SXin Li Value *ArgValue = EmitScalarExpr(E->getArg(0));
672*67e74705SXin Li
673*67e74705SXin Li llvm::Type *ArgType = ArgValue->getType();
674*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
675*67e74705SXin Li
676*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
677*67e74705SXin Li Value *Tmp = Builder.CreateCall(F, ArgValue);
678*67e74705SXin Li Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
679*67e74705SXin Li if (Result->getType() != ResultType)
680*67e74705SXin Li Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
681*67e74705SXin Li "cast");
682*67e74705SXin Li return RValue::get(Result);
683*67e74705SXin Li }
684*67e74705SXin Li case Builtin::BI__builtin_popcount:
685*67e74705SXin Li case Builtin::BI__builtin_popcountl:
686*67e74705SXin Li case Builtin::BI__builtin_popcountll: {
687*67e74705SXin Li Value *ArgValue = EmitScalarExpr(E->getArg(0));
688*67e74705SXin Li
689*67e74705SXin Li llvm::Type *ArgType = ArgValue->getType();
690*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
691*67e74705SXin Li
692*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
693*67e74705SXin Li Value *Result = Builder.CreateCall(F, ArgValue);
694*67e74705SXin Li if (Result->getType() != ResultType)
695*67e74705SXin Li Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
696*67e74705SXin Li "cast");
697*67e74705SXin Li return RValue::get(Result);
698*67e74705SXin Li }
699*67e74705SXin Li case Builtin::BI__builtin_unpredictable: {
700*67e74705SXin Li // Always return the argument of __builtin_unpredictable. LLVM does not
701*67e74705SXin Li // handle this builtin. Metadata for this builtin should be added directly
702*67e74705SXin Li // to instructions such as branches or switches that use it.
703*67e74705SXin Li return RValue::get(EmitScalarExpr(E->getArg(0)));
704*67e74705SXin Li }
705*67e74705SXin Li case Builtin::BI__builtin_expect: {
706*67e74705SXin Li Value *ArgValue = EmitScalarExpr(E->getArg(0));
707*67e74705SXin Li llvm::Type *ArgType = ArgValue->getType();
708*67e74705SXin Li
709*67e74705SXin Li Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
710*67e74705SXin Li // Don't generate llvm.expect on -O0 as the backend won't use it for
711*67e74705SXin Li // anything.
712*67e74705SXin Li // Note, we still IRGen ExpectedValue because it could have side-effects.
713*67e74705SXin Li if (CGM.getCodeGenOpts().OptimizationLevel == 0)
714*67e74705SXin Li return RValue::get(ArgValue);
715*67e74705SXin Li
716*67e74705SXin Li Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
717*67e74705SXin Li Value *Result =
718*67e74705SXin Li Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
719*67e74705SXin Li return RValue::get(Result);
720*67e74705SXin Li }
721*67e74705SXin Li case Builtin::BI__builtin_assume_aligned: {
722*67e74705SXin Li Value *PtrValue = EmitScalarExpr(E->getArg(0));
723*67e74705SXin Li Value *OffsetValue =
724*67e74705SXin Li (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
725*67e74705SXin Li
726*67e74705SXin Li Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
727*67e74705SXin Li ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
728*67e74705SXin Li unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
729*67e74705SXin Li
730*67e74705SXin Li EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
731*67e74705SXin Li return RValue::get(PtrValue);
732*67e74705SXin Li }
733*67e74705SXin Li case Builtin::BI__assume:
734*67e74705SXin Li case Builtin::BI__builtin_assume: {
735*67e74705SXin Li if (E->getArg(0)->HasSideEffects(getContext()))
736*67e74705SXin Li return RValue::get(nullptr);
737*67e74705SXin Li
738*67e74705SXin Li Value *ArgValue = EmitScalarExpr(E->getArg(0));
739*67e74705SXin Li Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
740*67e74705SXin Li return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
741*67e74705SXin Li }
742*67e74705SXin Li case Builtin::BI__builtin_bswap16:
743*67e74705SXin Li case Builtin::BI__builtin_bswap32:
744*67e74705SXin Li case Builtin::BI__builtin_bswap64: {
745*67e74705SXin Li return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
746*67e74705SXin Li }
747*67e74705SXin Li case Builtin::BI__builtin_bitreverse8:
748*67e74705SXin Li case Builtin::BI__builtin_bitreverse16:
749*67e74705SXin Li case Builtin::BI__builtin_bitreverse32:
750*67e74705SXin Li case Builtin::BI__builtin_bitreverse64: {
751*67e74705SXin Li return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
752*67e74705SXin Li }
753*67e74705SXin Li case Builtin::BI__builtin_object_size: {
754*67e74705SXin Li unsigned Type =
755*67e74705SXin Li E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
756*67e74705SXin Li auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
757*67e74705SXin Li
758*67e74705SXin Li // We pass this builtin onto the optimizer so that it can figure out the
759*67e74705SXin Li // object size in more complex cases.
760*67e74705SXin Li return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
761*67e74705SXin Li }
762*67e74705SXin Li case Builtin::BI__builtin_prefetch: {
763*67e74705SXin Li Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
764*67e74705SXin Li // FIXME: Technically these constants should of type 'int', yes?
765*67e74705SXin Li RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
766*67e74705SXin Li llvm::ConstantInt::get(Int32Ty, 0);
767*67e74705SXin Li Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
768*67e74705SXin Li llvm::ConstantInt::get(Int32Ty, 3);
769*67e74705SXin Li Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
770*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
771*67e74705SXin Li return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
772*67e74705SXin Li }
773*67e74705SXin Li case Builtin::BI__builtin_readcyclecounter: {
774*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
775*67e74705SXin Li return RValue::get(Builder.CreateCall(F));
776*67e74705SXin Li }
777*67e74705SXin Li case Builtin::BI__builtin___clear_cache: {
778*67e74705SXin Li Value *Begin = EmitScalarExpr(E->getArg(0));
779*67e74705SXin Li Value *End = EmitScalarExpr(E->getArg(1));
780*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
781*67e74705SXin Li return RValue::get(Builder.CreateCall(F, {Begin, End}));
782*67e74705SXin Li }
783*67e74705SXin Li case Builtin::BI__builtin_trap:
784*67e74705SXin Li return RValue::get(EmitTrapCall(Intrinsic::trap));
785*67e74705SXin Li case Builtin::BI__debugbreak:
786*67e74705SXin Li return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
787*67e74705SXin Li case Builtin::BI__builtin_unreachable: {
788*67e74705SXin Li if (SanOpts.has(SanitizerKind::Unreachable)) {
789*67e74705SXin Li SanitizerScope SanScope(this);
790*67e74705SXin Li EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
791*67e74705SXin Li SanitizerKind::Unreachable),
792*67e74705SXin Li "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()),
793*67e74705SXin Li None);
794*67e74705SXin Li } else
795*67e74705SXin Li Builder.CreateUnreachable();
796*67e74705SXin Li
797*67e74705SXin Li // We do need to preserve an insertion point.
798*67e74705SXin Li EmitBlock(createBasicBlock("unreachable.cont"));
799*67e74705SXin Li
800*67e74705SXin Li return RValue::get(nullptr);
801*67e74705SXin Li }
802*67e74705SXin Li
803*67e74705SXin Li case Builtin::BI__builtin_powi:
804*67e74705SXin Li case Builtin::BI__builtin_powif:
805*67e74705SXin Li case Builtin::BI__builtin_powil: {
806*67e74705SXin Li Value *Base = EmitScalarExpr(E->getArg(0));
807*67e74705SXin Li Value *Exponent = EmitScalarExpr(E->getArg(1));
808*67e74705SXin Li llvm::Type *ArgType = Base->getType();
809*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
810*67e74705SXin Li return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
811*67e74705SXin Li }
812*67e74705SXin Li
813*67e74705SXin Li case Builtin::BI__builtin_isgreater:
814*67e74705SXin Li case Builtin::BI__builtin_isgreaterequal:
815*67e74705SXin Li case Builtin::BI__builtin_isless:
816*67e74705SXin Li case Builtin::BI__builtin_islessequal:
817*67e74705SXin Li case Builtin::BI__builtin_islessgreater:
818*67e74705SXin Li case Builtin::BI__builtin_isunordered: {
819*67e74705SXin Li // Ordered comparisons: we know the arguments to these are matching scalar
820*67e74705SXin Li // floating point values.
821*67e74705SXin Li Value *LHS = EmitScalarExpr(E->getArg(0));
822*67e74705SXin Li Value *RHS = EmitScalarExpr(E->getArg(1));
823*67e74705SXin Li
824*67e74705SXin Li switch (BuiltinID) {
825*67e74705SXin Li default: llvm_unreachable("Unknown ordered comparison");
826*67e74705SXin Li case Builtin::BI__builtin_isgreater:
827*67e74705SXin Li LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
828*67e74705SXin Li break;
829*67e74705SXin Li case Builtin::BI__builtin_isgreaterequal:
830*67e74705SXin Li LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
831*67e74705SXin Li break;
832*67e74705SXin Li case Builtin::BI__builtin_isless:
833*67e74705SXin Li LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
834*67e74705SXin Li break;
835*67e74705SXin Li case Builtin::BI__builtin_islessequal:
836*67e74705SXin Li LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
837*67e74705SXin Li break;
838*67e74705SXin Li case Builtin::BI__builtin_islessgreater:
839*67e74705SXin Li LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
840*67e74705SXin Li break;
841*67e74705SXin Li case Builtin::BI__builtin_isunordered:
842*67e74705SXin Li LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
843*67e74705SXin Li break;
844*67e74705SXin Li }
845*67e74705SXin Li // ZExt bool to int type.
846*67e74705SXin Li return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
847*67e74705SXin Li }
848*67e74705SXin Li case Builtin::BI__builtin_isnan: {
849*67e74705SXin Li Value *V = EmitScalarExpr(E->getArg(0));
850*67e74705SXin Li V = Builder.CreateFCmpUNO(V, V, "cmp");
851*67e74705SXin Li return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
852*67e74705SXin Li }
853*67e74705SXin Li
854*67e74705SXin Li case Builtin::BI__builtin_isinf:
855*67e74705SXin Li case Builtin::BI__builtin_isfinite: {
856*67e74705SXin Li // isinf(x) --> fabs(x) == infinity
857*67e74705SXin Li // isfinite(x) --> fabs(x) != infinity
858*67e74705SXin Li // x != NaN via the ordered compare in either case.
859*67e74705SXin Li Value *V = EmitScalarExpr(E->getArg(0));
860*67e74705SXin Li Value *Fabs = EmitFAbs(*this, V);
861*67e74705SXin Li Constant *Infinity = ConstantFP::getInfinity(V->getType());
862*67e74705SXin Li CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
863*67e74705SXin Li ? CmpInst::FCMP_OEQ
864*67e74705SXin Li : CmpInst::FCMP_ONE;
865*67e74705SXin Li Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
866*67e74705SXin Li return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
867*67e74705SXin Li }
868*67e74705SXin Li
869*67e74705SXin Li case Builtin::BI__builtin_isinf_sign: {
870*67e74705SXin Li // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
871*67e74705SXin Li Value *Arg = EmitScalarExpr(E->getArg(0));
872*67e74705SXin Li Value *AbsArg = EmitFAbs(*this, Arg);
873*67e74705SXin Li Value *IsInf = Builder.CreateFCmpOEQ(
874*67e74705SXin Li AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
875*67e74705SXin Li Value *IsNeg = EmitSignBit(*this, Arg);
876*67e74705SXin Li
877*67e74705SXin Li llvm::Type *IntTy = ConvertType(E->getType());
878*67e74705SXin Li Value *Zero = Constant::getNullValue(IntTy);
879*67e74705SXin Li Value *One = ConstantInt::get(IntTy, 1);
880*67e74705SXin Li Value *NegativeOne = ConstantInt::get(IntTy, -1);
881*67e74705SXin Li Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
882*67e74705SXin Li Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
883*67e74705SXin Li return RValue::get(Result);
884*67e74705SXin Li }
885*67e74705SXin Li
886*67e74705SXin Li case Builtin::BI__builtin_isnormal: {
887*67e74705SXin Li // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
888*67e74705SXin Li Value *V = EmitScalarExpr(E->getArg(0));
889*67e74705SXin Li Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
890*67e74705SXin Li
891*67e74705SXin Li Value *Abs = EmitFAbs(*this, V);
892*67e74705SXin Li Value *IsLessThanInf =
893*67e74705SXin Li Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
894*67e74705SXin Li APFloat Smallest = APFloat::getSmallestNormalized(
895*67e74705SXin Li getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
896*67e74705SXin Li Value *IsNormal =
897*67e74705SXin Li Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
898*67e74705SXin Li "isnormal");
899*67e74705SXin Li V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
900*67e74705SXin Li V = Builder.CreateAnd(V, IsNormal, "and");
901*67e74705SXin Li return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
902*67e74705SXin Li }
903*67e74705SXin Li
904*67e74705SXin Li case Builtin::BI__builtin_fpclassify: {
905*67e74705SXin Li Value *V = EmitScalarExpr(E->getArg(5));
906*67e74705SXin Li llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
907*67e74705SXin Li
908*67e74705SXin Li // Create Result
909*67e74705SXin Li BasicBlock *Begin = Builder.GetInsertBlock();
910*67e74705SXin Li BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
911*67e74705SXin Li Builder.SetInsertPoint(End);
912*67e74705SXin Li PHINode *Result =
913*67e74705SXin Li Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
914*67e74705SXin Li "fpclassify_result");
915*67e74705SXin Li
916*67e74705SXin Li // if (V==0) return FP_ZERO
917*67e74705SXin Li Builder.SetInsertPoint(Begin);
918*67e74705SXin Li Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
919*67e74705SXin Li "iszero");
920*67e74705SXin Li Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
921*67e74705SXin Li BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
922*67e74705SXin Li Builder.CreateCondBr(IsZero, End, NotZero);
923*67e74705SXin Li Result->addIncoming(ZeroLiteral, Begin);
924*67e74705SXin Li
925*67e74705SXin Li // if (V != V) return FP_NAN
926*67e74705SXin Li Builder.SetInsertPoint(NotZero);
927*67e74705SXin Li Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
928*67e74705SXin Li Value *NanLiteral = EmitScalarExpr(E->getArg(0));
929*67e74705SXin Li BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
930*67e74705SXin Li Builder.CreateCondBr(IsNan, End, NotNan);
931*67e74705SXin Li Result->addIncoming(NanLiteral, NotZero);
932*67e74705SXin Li
933*67e74705SXin Li // if (fabs(V) == infinity) return FP_INFINITY
934*67e74705SXin Li Builder.SetInsertPoint(NotNan);
935*67e74705SXin Li Value *VAbs = EmitFAbs(*this, V);
936*67e74705SXin Li Value *IsInf =
937*67e74705SXin Li Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
938*67e74705SXin Li "isinf");
939*67e74705SXin Li Value *InfLiteral = EmitScalarExpr(E->getArg(1));
940*67e74705SXin Li BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
941*67e74705SXin Li Builder.CreateCondBr(IsInf, End, NotInf);
942*67e74705SXin Li Result->addIncoming(InfLiteral, NotNan);
943*67e74705SXin Li
944*67e74705SXin Li // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
945*67e74705SXin Li Builder.SetInsertPoint(NotInf);
946*67e74705SXin Li APFloat Smallest = APFloat::getSmallestNormalized(
947*67e74705SXin Li getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
948*67e74705SXin Li Value *IsNormal =
949*67e74705SXin Li Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
950*67e74705SXin Li "isnormal");
951*67e74705SXin Li Value *NormalResult =
952*67e74705SXin Li Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
953*67e74705SXin Li EmitScalarExpr(E->getArg(3)));
954*67e74705SXin Li Builder.CreateBr(End);
955*67e74705SXin Li Result->addIncoming(NormalResult, NotInf);
956*67e74705SXin Li
957*67e74705SXin Li // return Result
958*67e74705SXin Li Builder.SetInsertPoint(End);
959*67e74705SXin Li return RValue::get(Result);
960*67e74705SXin Li }
961*67e74705SXin Li
962*67e74705SXin Li case Builtin::BIalloca:
963*67e74705SXin Li case Builtin::BI_alloca:
964*67e74705SXin Li case Builtin::BI__builtin_alloca: {
965*67e74705SXin Li Value *Size = EmitScalarExpr(E->getArg(0));
966*67e74705SXin Li return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
967*67e74705SXin Li }
968*67e74705SXin Li case Builtin::BIbzero:
969*67e74705SXin Li case Builtin::BI__builtin_bzero: {
970*67e74705SXin Li Address Dest = EmitPointerWithAlignment(E->getArg(0));
971*67e74705SXin Li Value *SizeVal = EmitScalarExpr(E->getArg(1));
972*67e74705SXin Li EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
973*67e74705SXin Li E->getArg(0)->getExprLoc(), FD, 0);
974*67e74705SXin Li Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
975*67e74705SXin Li return RValue::get(Dest.getPointer());
976*67e74705SXin Li }
977*67e74705SXin Li case Builtin::BImemcpy:
978*67e74705SXin Li case Builtin::BI__builtin_memcpy: {
979*67e74705SXin Li Address Dest = EmitPointerWithAlignment(E->getArg(0));
980*67e74705SXin Li Address Src = EmitPointerWithAlignment(E->getArg(1));
981*67e74705SXin Li Value *SizeVal = EmitScalarExpr(E->getArg(2));
982*67e74705SXin Li EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
983*67e74705SXin Li E->getArg(0)->getExprLoc(), FD, 0);
984*67e74705SXin Li EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
985*67e74705SXin Li E->getArg(1)->getExprLoc(), FD, 1);
986*67e74705SXin Li Builder.CreateMemCpy(Dest, Src, SizeVal, false);
987*67e74705SXin Li return RValue::get(Dest.getPointer());
988*67e74705SXin Li }
989*67e74705SXin Li
990*67e74705SXin Li case Builtin::BI__builtin___memcpy_chk: {
991*67e74705SXin Li // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
992*67e74705SXin Li llvm::APSInt Size, DstSize;
993*67e74705SXin Li if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
994*67e74705SXin Li !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
995*67e74705SXin Li break;
996*67e74705SXin Li if (Size.ugt(DstSize))
997*67e74705SXin Li break;
998*67e74705SXin Li Address Dest = EmitPointerWithAlignment(E->getArg(0));
999*67e74705SXin Li Address Src = EmitPointerWithAlignment(E->getArg(1));
1000*67e74705SXin Li Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1001*67e74705SXin Li Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1002*67e74705SXin Li return RValue::get(Dest.getPointer());
1003*67e74705SXin Li }
1004*67e74705SXin Li
1005*67e74705SXin Li case Builtin::BI__builtin_objc_memmove_collectable: {
1006*67e74705SXin Li Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1007*67e74705SXin Li Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1008*67e74705SXin Li Value *SizeVal = EmitScalarExpr(E->getArg(2));
1009*67e74705SXin Li CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1010*67e74705SXin Li DestAddr, SrcAddr, SizeVal);
1011*67e74705SXin Li return RValue::get(DestAddr.getPointer());
1012*67e74705SXin Li }
1013*67e74705SXin Li
1014*67e74705SXin Li case Builtin::BI__builtin___memmove_chk: {
1015*67e74705SXin Li // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1016*67e74705SXin Li llvm::APSInt Size, DstSize;
1017*67e74705SXin Li if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1018*67e74705SXin Li !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1019*67e74705SXin Li break;
1020*67e74705SXin Li if (Size.ugt(DstSize))
1021*67e74705SXin Li break;
1022*67e74705SXin Li Address Dest = EmitPointerWithAlignment(E->getArg(0));
1023*67e74705SXin Li Address Src = EmitPointerWithAlignment(E->getArg(1));
1024*67e74705SXin Li Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1025*67e74705SXin Li Builder.CreateMemMove(Dest, Src, SizeVal, false);
1026*67e74705SXin Li return RValue::get(Dest.getPointer());
1027*67e74705SXin Li }
1028*67e74705SXin Li
1029*67e74705SXin Li case Builtin::BImemmove:
1030*67e74705SXin Li case Builtin::BI__builtin_memmove: {
1031*67e74705SXin Li Address Dest = EmitPointerWithAlignment(E->getArg(0));
1032*67e74705SXin Li Address Src = EmitPointerWithAlignment(E->getArg(1));
1033*67e74705SXin Li Value *SizeVal = EmitScalarExpr(E->getArg(2));
1034*67e74705SXin Li EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1035*67e74705SXin Li E->getArg(0)->getExprLoc(), FD, 0);
1036*67e74705SXin Li EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1037*67e74705SXin Li E->getArg(1)->getExprLoc(), FD, 1);
1038*67e74705SXin Li Builder.CreateMemMove(Dest, Src, SizeVal, false);
1039*67e74705SXin Li return RValue::get(Dest.getPointer());
1040*67e74705SXin Li }
1041*67e74705SXin Li case Builtin::BImemset:
1042*67e74705SXin Li case Builtin::BI__builtin_memset: {
1043*67e74705SXin Li Address Dest = EmitPointerWithAlignment(E->getArg(0));
1044*67e74705SXin Li Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1045*67e74705SXin Li Builder.getInt8Ty());
1046*67e74705SXin Li Value *SizeVal = EmitScalarExpr(E->getArg(2));
1047*67e74705SXin Li EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1048*67e74705SXin Li E->getArg(0)->getExprLoc(), FD, 0);
1049*67e74705SXin Li Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1050*67e74705SXin Li return RValue::get(Dest.getPointer());
1051*67e74705SXin Li }
1052*67e74705SXin Li case Builtin::BI__builtin___memset_chk: {
1053*67e74705SXin Li // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1054*67e74705SXin Li llvm::APSInt Size, DstSize;
1055*67e74705SXin Li if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1056*67e74705SXin Li !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1057*67e74705SXin Li break;
1058*67e74705SXin Li if (Size.ugt(DstSize))
1059*67e74705SXin Li break;
1060*67e74705SXin Li Address Dest = EmitPointerWithAlignment(E->getArg(0));
1061*67e74705SXin Li Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1062*67e74705SXin Li Builder.getInt8Ty());
1063*67e74705SXin Li Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1064*67e74705SXin Li Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1065*67e74705SXin Li return RValue::get(Dest.getPointer());
1066*67e74705SXin Li }
1067*67e74705SXin Li case Builtin::BI__builtin_dwarf_cfa: {
1068*67e74705SXin Li // The offset in bytes from the first argument to the CFA.
1069*67e74705SXin Li //
1070*67e74705SXin Li // Why on earth is this in the frontend? Is there any reason at
1071*67e74705SXin Li // all that the backend can't reasonably determine this while
1072*67e74705SXin Li // lowering llvm.eh.dwarf.cfa()?
1073*67e74705SXin Li //
1074*67e74705SXin Li // TODO: If there's a satisfactory reason, add a target hook for
1075*67e74705SXin Li // this instead of hard-coding 0, which is correct for most targets.
1076*67e74705SXin Li int32_t Offset = 0;
1077*67e74705SXin Li
1078*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1079*67e74705SXin Li return RValue::get(Builder.CreateCall(F,
1080*67e74705SXin Li llvm::ConstantInt::get(Int32Ty, Offset)));
1081*67e74705SXin Li }
1082*67e74705SXin Li case Builtin::BI__builtin_return_address: {
1083*67e74705SXin Li Value *Depth =
1084*67e74705SXin Li CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1085*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1086*67e74705SXin Li return RValue::get(Builder.CreateCall(F, Depth));
1087*67e74705SXin Li }
1088*67e74705SXin Li case Builtin::BI__builtin_frame_address: {
1089*67e74705SXin Li Value *Depth =
1090*67e74705SXin Li CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1091*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1092*67e74705SXin Li return RValue::get(Builder.CreateCall(F, Depth));
1093*67e74705SXin Li }
1094*67e74705SXin Li case Builtin::BI__builtin_extract_return_addr: {
1095*67e74705SXin Li Value *Address = EmitScalarExpr(E->getArg(0));
1096*67e74705SXin Li Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1097*67e74705SXin Li return RValue::get(Result);
1098*67e74705SXin Li }
1099*67e74705SXin Li case Builtin::BI__builtin_frob_return_addr: {
1100*67e74705SXin Li Value *Address = EmitScalarExpr(E->getArg(0));
1101*67e74705SXin Li Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1102*67e74705SXin Li return RValue::get(Result);
1103*67e74705SXin Li }
1104*67e74705SXin Li case Builtin::BI__builtin_dwarf_sp_column: {
1105*67e74705SXin Li llvm::IntegerType *Ty
1106*67e74705SXin Li = cast<llvm::IntegerType>(ConvertType(E->getType()));
1107*67e74705SXin Li int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1108*67e74705SXin Li if (Column == -1) {
1109*67e74705SXin Li CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1110*67e74705SXin Li return RValue::get(llvm::UndefValue::get(Ty));
1111*67e74705SXin Li }
1112*67e74705SXin Li return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1113*67e74705SXin Li }
1114*67e74705SXin Li case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1115*67e74705SXin Li Value *Address = EmitScalarExpr(E->getArg(0));
1116*67e74705SXin Li if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1117*67e74705SXin Li CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1118*67e74705SXin Li return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1119*67e74705SXin Li }
1120*67e74705SXin Li case Builtin::BI__builtin_eh_return: {
1121*67e74705SXin Li Value *Int = EmitScalarExpr(E->getArg(0));
1122*67e74705SXin Li Value *Ptr = EmitScalarExpr(E->getArg(1));
1123*67e74705SXin Li
1124*67e74705SXin Li llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1125*67e74705SXin Li assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1126*67e74705SXin Li "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1127*67e74705SXin Li Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1128*67e74705SXin Li ? Intrinsic::eh_return_i32
1129*67e74705SXin Li : Intrinsic::eh_return_i64);
1130*67e74705SXin Li Builder.CreateCall(F, {Int, Ptr});
1131*67e74705SXin Li Builder.CreateUnreachable();
1132*67e74705SXin Li
1133*67e74705SXin Li // We do need to preserve an insertion point.
1134*67e74705SXin Li EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1135*67e74705SXin Li
1136*67e74705SXin Li return RValue::get(nullptr);
1137*67e74705SXin Li }
1138*67e74705SXin Li case Builtin::BI__builtin_unwind_init: {
1139*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1140*67e74705SXin Li return RValue::get(Builder.CreateCall(F));
1141*67e74705SXin Li }
1142*67e74705SXin Li case Builtin::BI__builtin_extend_pointer: {
1143*67e74705SXin Li // Extends a pointer to the size of an _Unwind_Word, which is
1144*67e74705SXin Li // uint64_t on all platforms. Generally this gets poked into a
1145*67e74705SXin Li // register and eventually used as an address, so if the
1146*67e74705SXin Li // addressing registers are wider than pointers and the platform
1147*67e74705SXin Li // doesn't implicitly ignore high-order bits when doing
1148*67e74705SXin Li // addressing, we need to make sure we zext / sext based on
1149*67e74705SXin Li // the platform's expectations.
1150*67e74705SXin Li //
1151*67e74705SXin Li // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1152*67e74705SXin Li
1153*67e74705SXin Li // Cast the pointer to intptr_t.
1154*67e74705SXin Li Value *Ptr = EmitScalarExpr(E->getArg(0));
1155*67e74705SXin Li Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1156*67e74705SXin Li
1157*67e74705SXin Li // If that's 64 bits, we're done.
1158*67e74705SXin Li if (IntPtrTy->getBitWidth() == 64)
1159*67e74705SXin Li return RValue::get(Result);
1160*67e74705SXin Li
1161*67e74705SXin Li // Otherwise, ask the codegen data what to do.
1162*67e74705SXin Li if (getTargetHooks().extendPointerWithSExt())
1163*67e74705SXin Li return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1164*67e74705SXin Li else
1165*67e74705SXin Li return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1166*67e74705SXin Li }
1167*67e74705SXin Li case Builtin::BI__builtin_setjmp: {
1168*67e74705SXin Li // Buffer is a void**.
1169*67e74705SXin Li Address Buf = EmitPointerWithAlignment(E->getArg(0));
1170*67e74705SXin Li
1171*67e74705SXin Li // Store the frame pointer to the setjmp buffer.
1172*67e74705SXin Li Value *FrameAddr =
1173*67e74705SXin Li Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1174*67e74705SXin Li ConstantInt::get(Int32Ty, 0));
1175*67e74705SXin Li Builder.CreateStore(FrameAddr, Buf);
1176*67e74705SXin Li
1177*67e74705SXin Li // Store the stack pointer to the setjmp buffer.
1178*67e74705SXin Li Value *StackAddr =
1179*67e74705SXin Li Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1180*67e74705SXin Li Address StackSaveSlot =
1181*67e74705SXin Li Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1182*67e74705SXin Li Builder.CreateStore(StackAddr, StackSaveSlot);
1183*67e74705SXin Li
1184*67e74705SXin Li // Call LLVM's EH setjmp, which is lightweight.
1185*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1186*67e74705SXin Li Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1187*67e74705SXin Li return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1188*67e74705SXin Li }
1189*67e74705SXin Li case Builtin::BI__builtin_longjmp: {
1190*67e74705SXin Li Value *Buf = EmitScalarExpr(E->getArg(0));
1191*67e74705SXin Li Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1192*67e74705SXin Li
1193*67e74705SXin Li // Call LLVM's EH longjmp, which is lightweight.
1194*67e74705SXin Li Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1195*67e74705SXin Li
1196*67e74705SXin Li // longjmp doesn't return; mark this as unreachable.
1197*67e74705SXin Li Builder.CreateUnreachable();
1198*67e74705SXin Li
1199*67e74705SXin Li // We do need to preserve an insertion point.
1200*67e74705SXin Li EmitBlock(createBasicBlock("longjmp.cont"));
1201*67e74705SXin Li
1202*67e74705SXin Li return RValue::get(nullptr);
1203*67e74705SXin Li }
1204*67e74705SXin Li case Builtin::BI__sync_fetch_and_add:
1205*67e74705SXin Li case Builtin::BI__sync_fetch_and_sub:
1206*67e74705SXin Li case Builtin::BI__sync_fetch_and_or:
1207*67e74705SXin Li case Builtin::BI__sync_fetch_and_and:
1208*67e74705SXin Li case Builtin::BI__sync_fetch_and_xor:
1209*67e74705SXin Li case Builtin::BI__sync_fetch_and_nand:
1210*67e74705SXin Li case Builtin::BI__sync_add_and_fetch:
1211*67e74705SXin Li case Builtin::BI__sync_sub_and_fetch:
1212*67e74705SXin Li case Builtin::BI__sync_and_and_fetch:
1213*67e74705SXin Li case Builtin::BI__sync_or_and_fetch:
1214*67e74705SXin Li case Builtin::BI__sync_xor_and_fetch:
1215*67e74705SXin Li case Builtin::BI__sync_nand_and_fetch:
1216*67e74705SXin Li case Builtin::BI__sync_val_compare_and_swap:
1217*67e74705SXin Li case Builtin::BI__sync_bool_compare_and_swap:
1218*67e74705SXin Li case Builtin::BI__sync_lock_test_and_set:
1219*67e74705SXin Li case Builtin::BI__sync_lock_release:
1220*67e74705SXin Li case Builtin::BI__sync_swap:
1221*67e74705SXin Li llvm_unreachable("Shouldn't make it through sema");
1222*67e74705SXin Li case Builtin::BI__sync_fetch_and_add_1:
1223*67e74705SXin Li case Builtin::BI__sync_fetch_and_add_2:
1224*67e74705SXin Li case Builtin::BI__sync_fetch_and_add_4:
1225*67e74705SXin Li case Builtin::BI__sync_fetch_and_add_8:
1226*67e74705SXin Li case Builtin::BI__sync_fetch_and_add_16:
1227*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1228*67e74705SXin Li case Builtin::BI__sync_fetch_and_sub_1:
1229*67e74705SXin Li case Builtin::BI__sync_fetch_and_sub_2:
1230*67e74705SXin Li case Builtin::BI__sync_fetch_and_sub_4:
1231*67e74705SXin Li case Builtin::BI__sync_fetch_and_sub_8:
1232*67e74705SXin Li case Builtin::BI__sync_fetch_and_sub_16:
1233*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1234*67e74705SXin Li case Builtin::BI__sync_fetch_and_or_1:
1235*67e74705SXin Li case Builtin::BI__sync_fetch_and_or_2:
1236*67e74705SXin Li case Builtin::BI__sync_fetch_and_or_4:
1237*67e74705SXin Li case Builtin::BI__sync_fetch_and_or_8:
1238*67e74705SXin Li case Builtin::BI__sync_fetch_and_or_16:
1239*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1240*67e74705SXin Li case Builtin::BI__sync_fetch_and_and_1:
1241*67e74705SXin Li case Builtin::BI__sync_fetch_and_and_2:
1242*67e74705SXin Li case Builtin::BI__sync_fetch_and_and_4:
1243*67e74705SXin Li case Builtin::BI__sync_fetch_and_and_8:
1244*67e74705SXin Li case Builtin::BI__sync_fetch_and_and_16:
1245*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1246*67e74705SXin Li case Builtin::BI__sync_fetch_and_xor_1:
1247*67e74705SXin Li case Builtin::BI__sync_fetch_and_xor_2:
1248*67e74705SXin Li case Builtin::BI__sync_fetch_and_xor_4:
1249*67e74705SXin Li case Builtin::BI__sync_fetch_and_xor_8:
1250*67e74705SXin Li case Builtin::BI__sync_fetch_and_xor_16:
1251*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1252*67e74705SXin Li case Builtin::BI__sync_fetch_and_nand_1:
1253*67e74705SXin Li case Builtin::BI__sync_fetch_and_nand_2:
1254*67e74705SXin Li case Builtin::BI__sync_fetch_and_nand_4:
1255*67e74705SXin Li case Builtin::BI__sync_fetch_and_nand_8:
1256*67e74705SXin Li case Builtin::BI__sync_fetch_and_nand_16:
1257*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1258*67e74705SXin Li
1259*67e74705SXin Li // Clang extensions: not overloaded yet.
1260*67e74705SXin Li case Builtin::BI__sync_fetch_and_min:
1261*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1262*67e74705SXin Li case Builtin::BI__sync_fetch_and_max:
1263*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1264*67e74705SXin Li case Builtin::BI__sync_fetch_and_umin:
1265*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1266*67e74705SXin Li case Builtin::BI__sync_fetch_and_umax:
1267*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1268*67e74705SXin Li
1269*67e74705SXin Li case Builtin::BI__sync_add_and_fetch_1:
1270*67e74705SXin Li case Builtin::BI__sync_add_and_fetch_2:
1271*67e74705SXin Li case Builtin::BI__sync_add_and_fetch_4:
1272*67e74705SXin Li case Builtin::BI__sync_add_and_fetch_8:
1273*67e74705SXin Li case Builtin::BI__sync_add_and_fetch_16:
1274*67e74705SXin Li return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1275*67e74705SXin Li llvm::Instruction::Add);
1276*67e74705SXin Li case Builtin::BI__sync_sub_and_fetch_1:
1277*67e74705SXin Li case Builtin::BI__sync_sub_and_fetch_2:
1278*67e74705SXin Li case Builtin::BI__sync_sub_and_fetch_4:
1279*67e74705SXin Li case Builtin::BI__sync_sub_and_fetch_8:
1280*67e74705SXin Li case Builtin::BI__sync_sub_and_fetch_16:
1281*67e74705SXin Li return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1282*67e74705SXin Li llvm::Instruction::Sub);
1283*67e74705SXin Li case Builtin::BI__sync_and_and_fetch_1:
1284*67e74705SXin Li case Builtin::BI__sync_and_and_fetch_2:
1285*67e74705SXin Li case Builtin::BI__sync_and_and_fetch_4:
1286*67e74705SXin Li case Builtin::BI__sync_and_and_fetch_8:
1287*67e74705SXin Li case Builtin::BI__sync_and_and_fetch_16:
1288*67e74705SXin Li return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1289*67e74705SXin Li llvm::Instruction::And);
1290*67e74705SXin Li case Builtin::BI__sync_or_and_fetch_1:
1291*67e74705SXin Li case Builtin::BI__sync_or_and_fetch_2:
1292*67e74705SXin Li case Builtin::BI__sync_or_and_fetch_4:
1293*67e74705SXin Li case Builtin::BI__sync_or_and_fetch_8:
1294*67e74705SXin Li case Builtin::BI__sync_or_and_fetch_16:
1295*67e74705SXin Li return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1296*67e74705SXin Li llvm::Instruction::Or);
1297*67e74705SXin Li case Builtin::BI__sync_xor_and_fetch_1:
1298*67e74705SXin Li case Builtin::BI__sync_xor_and_fetch_2:
1299*67e74705SXin Li case Builtin::BI__sync_xor_and_fetch_4:
1300*67e74705SXin Li case Builtin::BI__sync_xor_and_fetch_8:
1301*67e74705SXin Li case Builtin::BI__sync_xor_and_fetch_16:
1302*67e74705SXin Li return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1303*67e74705SXin Li llvm::Instruction::Xor);
1304*67e74705SXin Li case Builtin::BI__sync_nand_and_fetch_1:
1305*67e74705SXin Li case Builtin::BI__sync_nand_and_fetch_2:
1306*67e74705SXin Li case Builtin::BI__sync_nand_and_fetch_4:
1307*67e74705SXin Li case Builtin::BI__sync_nand_and_fetch_8:
1308*67e74705SXin Li case Builtin::BI__sync_nand_and_fetch_16:
1309*67e74705SXin Li return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1310*67e74705SXin Li llvm::Instruction::And, true);
1311*67e74705SXin Li
1312*67e74705SXin Li case Builtin::BI__sync_val_compare_and_swap_1:
1313*67e74705SXin Li case Builtin::BI__sync_val_compare_and_swap_2:
1314*67e74705SXin Li case Builtin::BI__sync_val_compare_and_swap_4:
1315*67e74705SXin Li case Builtin::BI__sync_val_compare_and_swap_8:
1316*67e74705SXin Li case Builtin::BI__sync_val_compare_and_swap_16:
1317*67e74705SXin Li return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1318*67e74705SXin Li
1319*67e74705SXin Li case Builtin::BI__sync_bool_compare_and_swap_1:
1320*67e74705SXin Li case Builtin::BI__sync_bool_compare_and_swap_2:
1321*67e74705SXin Li case Builtin::BI__sync_bool_compare_and_swap_4:
1322*67e74705SXin Li case Builtin::BI__sync_bool_compare_and_swap_8:
1323*67e74705SXin Li case Builtin::BI__sync_bool_compare_and_swap_16:
1324*67e74705SXin Li return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1325*67e74705SXin Li
1326*67e74705SXin Li case Builtin::BI__sync_swap_1:
1327*67e74705SXin Li case Builtin::BI__sync_swap_2:
1328*67e74705SXin Li case Builtin::BI__sync_swap_4:
1329*67e74705SXin Li case Builtin::BI__sync_swap_8:
1330*67e74705SXin Li case Builtin::BI__sync_swap_16:
1331*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1332*67e74705SXin Li
1333*67e74705SXin Li case Builtin::BI__sync_lock_test_and_set_1:
1334*67e74705SXin Li case Builtin::BI__sync_lock_test_and_set_2:
1335*67e74705SXin Li case Builtin::BI__sync_lock_test_and_set_4:
1336*67e74705SXin Li case Builtin::BI__sync_lock_test_and_set_8:
1337*67e74705SXin Li case Builtin::BI__sync_lock_test_and_set_16:
1338*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1339*67e74705SXin Li
1340*67e74705SXin Li case Builtin::BI__sync_lock_release_1:
1341*67e74705SXin Li case Builtin::BI__sync_lock_release_2:
1342*67e74705SXin Li case Builtin::BI__sync_lock_release_4:
1343*67e74705SXin Li case Builtin::BI__sync_lock_release_8:
1344*67e74705SXin Li case Builtin::BI__sync_lock_release_16: {
1345*67e74705SXin Li Value *Ptr = EmitScalarExpr(E->getArg(0));
1346*67e74705SXin Li QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1347*67e74705SXin Li CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1348*67e74705SXin Li llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1349*67e74705SXin Li StoreSize.getQuantity() * 8);
1350*67e74705SXin Li Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1351*67e74705SXin Li llvm::StoreInst *Store =
1352*67e74705SXin Li Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1353*67e74705SXin Li StoreSize);
1354*67e74705SXin Li Store->setAtomic(llvm::AtomicOrdering::Release);
1355*67e74705SXin Li return RValue::get(nullptr);
1356*67e74705SXin Li }
1357*67e74705SXin Li
1358*67e74705SXin Li case Builtin::BI__sync_synchronize: {
1359*67e74705SXin Li // We assume this is supposed to correspond to a C++0x-style
1360*67e74705SXin Li // sequentially-consistent fence (i.e. this is only usable for
1361*67e74705SXin Li // synchonization, not device I/O or anything like that). This intrinsic
1362*67e74705SXin Li // is really badly designed in the sense that in theory, there isn't
1363*67e74705SXin Li // any way to safely use it... but in practice, it mostly works
1364*67e74705SXin Li // to use it with non-atomic loads and stores to get acquire/release
1365*67e74705SXin Li // semantics.
1366*67e74705SXin Li Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1367*67e74705SXin Li return RValue::get(nullptr);
1368*67e74705SXin Li }
1369*67e74705SXin Li
1370*67e74705SXin Li case Builtin::BI__builtin_nontemporal_load:
1371*67e74705SXin Li return RValue::get(EmitNontemporalLoad(*this, E));
1372*67e74705SXin Li case Builtin::BI__builtin_nontemporal_store:
1373*67e74705SXin Li return RValue::get(EmitNontemporalStore(*this, E));
1374*67e74705SXin Li case Builtin::BI__c11_atomic_is_lock_free:
1375*67e74705SXin Li case Builtin::BI__atomic_is_lock_free: {
1376*67e74705SXin Li // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1377*67e74705SXin Li // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1378*67e74705SXin Li // _Atomic(T) is always properly-aligned.
1379*67e74705SXin Li const char *LibCallName = "__atomic_is_lock_free";
1380*67e74705SXin Li CallArgList Args;
1381*67e74705SXin Li Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1382*67e74705SXin Li getContext().getSizeType());
1383*67e74705SXin Li if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1384*67e74705SXin Li Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1385*67e74705SXin Li getContext().VoidPtrTy);
1386*67e74705SXin Li else
1387*67e74705SXin Li Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1388*67e74705SXin Li getContext().VoidPtrTy);
1389*67e74705SXin Li const CGFunctionInfo &FuncInfo =
1390*67e74705SXin Li CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1391*67e74705SXin Li llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1392*67e74705SXin Li llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1393*67e74705SXin Li return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1394*67e74705SXin Li }
1395*67e74705SXin Li
1396*67e74705SXin Li case Builtin::BI__atomic_test_and_set: {
1397*67e74705SXin Li // Look at the argument type to determine whether this is a volatile
1398*67e74705SXin Li // operation. The parameter type is always volatile.
1399*67e74705SXin Li QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1400*67e74705SXin Li bool Volatile =
1401*67e74705SXin Li PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1402*67e74705SXin Li
1403*67e74705SXin Li Value *Ptr = EmitScalarExpr(E->getArg(0));
1404*67e74705SXin Li unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1405*67e74705SXin Li Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1406*67e74705SXin Li Value *NewVal = Builder.getInt8(1);
1407*67e74705SXin Li Value *Order = EmitScalarExpr(E->getArg(1));
1408*67e74705SXin Li if (isa<llvm::ConstantInt>(Order)) {
1409*67e74705SXin Li int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1410*67e74705SXin Li AtomicRMWInst *Result = nullptr;
1411*67e74705SXin Li switch (ord) {
1412*67e74705SXin Li case 0: // memory_order_relaxed
1413*67e74705SXin Li default: // invalid order
1414*67e74705SXin Li Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1415*67e74705SXin Li llvm::AtomicOrdering::Monotonic);
1416*67e74705SXin Li break;
1417*67e74705SXin Li case 1: // memory_order_consume
1418*67e74705SXin Li case 2: // memory_order_acquire
1419*67e74705SXin Li Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1420*67e74705SXin Li llvm::AtomicOrdering::Acquire);
1421*67e74705SXin Li break;
1422*67e74705SXin Li case 3: // memory_order_release
1423*67e74705SXin Li Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1424*67e74705SXin Li llvm::AtomicOrdering::Release);
1425*67e74705SXin Li break;
1426*67e74705SXin Li case 4: // memory_order_acq_rel
1427*67e74705SXin Li
1428*67e74705SXin Li Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1429*67e74705SXin Li llvm::AtomicOrdering::AcquireRelease);
1430*67e74705SXin Li break;
1431*67e74705SXin Li case 5: // memory_order_seq_cst
1432*67e74705SXin Li Result = Builder.CreateAtomicRMW(
1433*67e74705SXin Li llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1434*67e74705SXin Li llvm::AtomicOrdering::SequentiallyConsistent);
1435*67e74705SXin Li break;
1436*67e74705SXin Li }
1437*67e74705SXin Li Result->setVolatile(Volatile);
1438*67e74705SXin Li return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1439*67e74705SXin Li }
1440*67e74705SXin Li
1441*67e74705SXin Li llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1442*67e74705SXin Li
1443*67e74705SXin Li llvm::BasicBlock *BBs[5] = {
1444*67e74705SXin Li createBasicBlock("monotonic", CurFn),
1445*67e74705SXin Li createBasicBlock("acquire", CurFn),
1446*67e74705SXin Li createBasicBlock("release", CurFn),
1447*67e74705SXin Li createBasicBlock("acqrel", CurFn),
1448*67e74705SXin Li createBasicBlock("seqcst", CurFn)
1449*67e74705SXin Li };
1450*67e74705SXin Li llvm::AtomicOrdering Orders[5] = {
1451*67e74705SXin Li llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1452*67e74705SXin Li llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1453*67e74705SXin Li llvm::AtomicOrdering::SequentiallyConsistent};
1454*67e74705SXin Li
1455*67e74705SXin Li Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1456*67e74705SXin Li llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1457*67e74705SXin Li
1458*67e74705SXin Li Builder.SetInsertPoint(ContBB);
1459*67e74705SXin Li PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1460*67e74705SXin Li
1461*67e74705SXin Li for (unsigned i = 0; i < 5; ++i) {
1462*67e74705SXin Li Builder.SetInsertPoint(BBs[i]);
1463*67e74705SXin Li AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1464*67e74705SXin Li Ptr, NewVal, Orders[i]);
1465*67e74705SXin Li RMW->setVolatile(Volatile);
1466*67e74705SXin Li Result->addIncoming(RMW, BBs[i]);
1467*67e74705SXin Li Builder.CreateBr(ContBB);
1468*67e74705SXin Li }
1469*67e74705SXin Li
1470*67e74705SXin Li SI->addCase(Builder.getInt32(0), BBs[0]);
1471*67e74705SXin Li SI->addCase(Builder.getInt32(1), BBs[1]);
1472*67e74705SXin Li SI->addCase(Builder.getInt32(2), BBs[1]);
1473*67e74705SXin Li SI->addCase(Builder.getInt32(3), BBs[2]);
1474*67e74705SXin Li SI->addCase(Builder.getInt32(4), BBs[3]);
1475*67e74705SXin Li SI->addCase(Builder.getInt32(5), BBs[4]);
1476*67e74705SXin Li
1477*67e74705SXin Li Builder.SetInsertPoint(ContBB);
1478*67e74705SXin Li return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1479*67e74705SXin Li }
1480*67e74705SXin Li
1481*67e74705SXin Li case Builtin::BI__atomic_clear: {
1482*67e74705SXin Li QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1483*67e74705SXin Li bool Volatile =
1484*67e74705SXin Li PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1485*67e74705SXin Li
1486*67e74705SXin Li Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1487*67e74705SXin Li unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1488*67e74705SXin Li Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1489*67e74705SXin Li Value *NewVal = Builder.getInt8(0);
1490*67e74705SXin Li Value *Order = EmitScalarExpr(E->getArg(1));
1491*67e74705SXin Li if (isa<llvm::ConstantInt>(Order)) {
1492*67e74705SXin Li int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1493*67e74705SXin Li StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1494*67e74705SXin Li switch (ord) {
1495*67e74705SXin Li case 0: // memory_order_relaxed
1496*67e74705SXin Li default: // invalid order
1497*67e74705SXin Li Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1498*67e74705SXin Li break;
1499*67e74705SXin Li case 3: // memory_order_release
1500*67e74705SXin Li Store->setOrdering(llvm::AtomicOrdering::Release);
1501*67e74705SXin Li break;
1502*67e74705SXin Li case 5: // memory_order_seq_cst
1503*67e74705SXin Li Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1504*67e74705SXin Li break;
1505*67e74705SXin Li }
1506*67e74705SXin Li return RValue::get(nullptr);
1507*67e74705SXin Li }
1508*67e74705SXin Li
1509*67e74705SXin Li llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1510*67e74705SXin Li
1511*67e74705SXin Li llvm::BasicBlock *BBs[3] = {
1512*67e74705SXin Li createBasicBlock("monotonic", CurFn),
1513*67e74705SXin Li createBasicBlock("release", CurFn),
1514*67e74705SXin Li createBasicBlock("seqcst", CurFn)
1515*67e74705SXin Li };
1516*67e74705SXin Li llvm::AtomicOrdering Orders[3] = {
1517*67e74705SXin Li llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1518*67e74705SXin Li llvm::AtomicOrdering::SequentiallyConsistent};
1519*67e74705SXin Li
1520*67e74705SXin Li Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1521*67e74705SXin Li llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1522*67e74705SXin Li
1523*67e74705SXin Li for (unsigned i = 0; i < 3; ++i) {
1524*67e74705SXin Li Builder.SetInsertPoint(BBs[i]);
1525*67e74705SXin Li StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1526*67e74705SXin Li Store->setOrdering(Orders[i]);
1527*67e74705SXin Li Builder.CreateBr(ContBB);
1528*67e74705SXin Li }
1529*67e74705SXin Li
1530*67e74705SXin Li SI->addCase(Builder.getInt32(0), BBs[0]);
1531*67e74705SXin Li SI->addCase(Builder.getInt32(3), BBs[1]);
1532*67e74705SXin Li SI->addCase(Builder.getInt32(5), BBs[2]);
1533*67e74705SXin Li
1534*67e74705SXin Li Builder.SetInsertPoint(ContBB);
1535*67e74705SXin Li return RValue::get(nullptr);
1536*67e74705SXin Li }
1537*67e74705SXin Li
1538*67e74705SXin Li case Builtin::BI__atomic_thread_fence:
1539*67e74705SXin Li case Builtin::BI__atomic_signal_fence:
1540*67e74705SXin Li case Builtin::BI__c11_atomic_thread_fence:
1541*67e74705SXin Li case Builtin::BI__c11_atomic_signal_fence: {
1542*67e74705SXin Li llvm::SynchronizationScope Scope;
1543*67e74705SXin Li if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1544*67e74705SXin Li BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1545*67e74705SXin Li Scope = llvm::SingleThread;
1546*67e74705SXin Li else
1547*67e74705SXin Li Scope = llvm::CrossThread;
1548*67e74705SXin Li Value *Order = EmitScalarExpr(E->getArg(0));
1549*67e74705SXin Li if (isa<llvm::ConstantInt>(Order)) {
1550*67e74705SXin Li int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1551*67e74705SXin Li switch (ord) {
1552*67e74705SXin Li case 0: // memory_order_relaxed
1553*67e74705SXin Li default: // invalid order
1554*67e74705SXin Li break;
1555*67e74705SXin Li case 1: // memory_order_consume
1556*67e74705SXin Li case 2: // memory_order_acquire
1557*67e74705SXin Li Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1558*67e74705SXin Li break;
1559*67e74705SXin Li case 3: // memory_order_release
1560*67e74705SXin Li Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1561*67e74705SXin Li break;
1562*67e74705SXin Li case 4: // memory_order_acq_rel
1563*67e74705SXin Li Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1564*67e74705SXin Li break;
1565*67e74705SXin Li case 5: // memory_order_seq_cst
1566*67e74705SXin Li Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1567*67e74705SXin Li Scope);
1568*67e74705SXin Li break;
1569*67e74705SXin Li }
1570*67e74705SXin Li return RValue::get(nullptr);
1571*67e74705SXin Li }
1572*67e74705SXin Li
1573*67e74705SXin Li llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1574*67e74705SXin Li AcquireBB = createBasicBlock("acquire", CurFn);
1575*67e74705SXin Li ReleaseBB = createBasicBlock("release", CurFn);
1576*67e74705SXin Li AcqRelBB = createBasicBlock("acqrel", CurFn);
1577*67e74705SXin Li SeqCstBB = createBasicBlock("seqcst", CurFn);
1578*67e74705SXin Li llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1579*67e74705SXin Li
1580*67e74705SXin Li Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1581*67e74705SXin Li llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1582*67e74705SXin Li
1583*67e74705SXin Li Builder.SetInsertPoint(AcquireBB);
1584*67e74705SXin Li Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1585*67e74705SXin Li Builder.CreateBr(ContBB);
1586*67e74705SXin Li SI->addCase(Builder.getInt32(1), AcquireBB);
1587*67e74705SXin Li SI->addCase(Builder.getInt32(2), AcquireBB);
1588*67e74705SXin Li
1589*67e74705SXin Li Builder.SetInsertPoint(ReleaseBB);
1590*67e74705SXin Li Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1591*67e74705SXin Li Builder.CreateBr(ContBB);
1592*67e74705SXin Li SI->addCase(Builder.getInt32(3), ReleaseBB);
1593*67e74705SXin Li
1594*67e74705SXin Li Builder.SetInsertPoint(AcqRelBB);
1595*67e74705SXin Li Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1596*67e74705SXin Li Builder.CreateBr(ContBB);
1597*67e74705SXin Li SI->addCase(Builder.getInt32(4), AcqRelBB);
1598*67e74705SXin Li
1599*67e74705SXin Li Builder.SetInsertPoint(SeqCstBB);
1600*67e74705SXin Li Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1601*67e74705SXin Li Builder.CreateBr(ContBB);
1602*67e74705SXin Li SI->addCase(Builder.getInt32(5), SeqCstBB);
1603*67e74705SXin Li
1604*67e74705SXin Li Builder.SetInsertPoint(ContBB);
1605*67e74705SXin Li return RValue::get(nullptr);
1606*67e74705SXin Li }
1607*67e74705SXin Li
1608*67e74705SXin Li // Library functions with special handling.
1609*67e74705SXin Li case Builtin::BIsqrt:
1610*67e74705SXin Li case Builtin::BIsqrtf:
1611*67e74705SXin Li case Builtin::BIsqrtl: {
1612*67e74705SXin Li // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1613*67e74705SXin Li // in finite- or unsafe-math mode (the intrinsic has different semantics
1614*67e74705SXin Li // for handling negative numbers compared to the library function, so
1615*67e74705SXin Li // -fmath-errno=0 is not enough).
1616*67e74705SXin Li if (!FD->hasAttr<ConstAttr>())
1617*67e74705SXin Li break;
1618*67e74705SXin Li if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1619*67e74705SXin Li CGM.getCodeGenOpts().NoNaNsFPMath))
1620*67e74705SXin Li break;
1621*67e74705SXin Li Value *Arg0 = EmitScalarExpr(E->getArg(0));
1622*67e74705SXin Li llvm::Type *ArgType = Arg0->getType();
1623*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1624*67e74705SXin Li return RValue::get(Builder.CreateCall(F, Arg0));
1625*67e74705SXin Li }
1626*67e74705SXin Li
1627*67e74705SXin Li case Builtin::BI__builtin_pow:
1628*67e74705SXin Li case Builtin::BI__builtin_powf:
1629*67e74705SXin Li case Builtin::BI__builtin_powl:
1630*67e74705SXin Li case Builtin::BIpow:
1631*67e74705SXin Li case Builtin::BIpowf:
1632*67e74705SXin Li case Builtin::BIpowl: {
1633*67e74705SXin Li // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1634*67e74705SXin Li if (!FD->hasAttr<ConstAttr>())
1635*67e74705SXin Li break;
1636*67e74705SXin Li Value *Base = EmitScalarExpr(E->getArg(0));
1637*67e74705SXin Li Value *Exponent = EmitScalarExpr(E->getArg(1));
1638*67e74705SXin Li llvm::Type *ArgType = Base->getType();
1639*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1640*67e74705SXin Li return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1641*67e74705SXin Li }
1642*67e74705SXin Li
1643*67e74705SXin Li case Builtin::BIfma:
1644*67e74705SXin Li case Builtin::BIfmaf:
1645*67e74705SXin Li case Builtin::BIfmal:
1646*67e74705SXin Li case Builtin::BI__builtin_fma:
1647*67e74705SXin Li case Builtin::BI__builtin_fmaf:
1648*67e74705SXin Li case Builtin::BI__builtin_fmal: {
1649*67e74705SXin Li // Rewrite fma to intrinsic.
1650*67e74705SXin Li Value *FirstArg = EmitScalarExpr(E->getArg(0));
1651*67e74705SXin Li llvm::Type *ArgType = FirstArg->getType();
1652*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1653*67e74705SXin Li return RValue::get(
1654*67e74705SXin Li Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1655*67e74705SXin Li EmitScalarExpr(E->getArg(2))}));
1656*67e74705SXin Li }
1657*67e74705SXin Li
1658*67e74705SXin Li case Builtin::BI__builtin_signbit:
1659*67e74705SXin Li case Builtin::BI__builtin_signbitf:
1660*67e74705SXin Li case Builtin::BI__builtin_signbitl: {
1661*67e74705SXin Li return RValue::get(
1662*67e74705SXin Li Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1663*67e74705SXin Li ConvertType(E->getType())));
1664*67e74705SXin Li }
1665*67e74705SXin Li case Builtin::BI__builtin_annotation: {
1666*67e74705SXin Li llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1667*67e74705SXin Li llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1668*67e74705SXin Li AnnVal->getType());
1669*67e74705SXin Li
1670*67e74705SXin Li // Get the annotation string, go through casts. Sema requires this to be a
1671*67e74705SXin Li // non-wide string literal, potentially casted, so the cast<> is safe.
1672*67e74705SXin Li const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1673*67e74705SXin Li StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1674*67e74705SXin Li return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1675*67e74705SXin Li }
1676*67e74705SXin Li case Builtin::BI__builtin_addcb:
1677*67e74705SXin Li case Builtin::BI__builtin_addcs:
1678*67e74705SXin Li case Builtin::BI__builtin_addc:
1679*67e74705SXin Li case Builtin::BI__builtin_addcl:
1680*67e74705SXin Li case Builtin::BI__builtin_addcll:
1681*67e74705SXin Li case Builtin::BI__builtin_subcb:
1682*67e74705SXin Li case Builtin::BI__builtin_subcs:
1683*67e74705SXin Li case Builtin::BI__builtin_subc:
1684*67e74705SXin Li case Builtin::BI__builtin_subcl:
1685*67e74705SXin Li case Builtin::BI__builtin_subcll: {
1686*67e74705SXin Li
1687*67e74705SXin Li // We translate all of these builtins from expressions of the form:
1688*67e74705SXin Li // int x = ..., y = ..., carryin = ..., carryout, result;
1689*67e74705SXin Li // result = __builtin_addc(x, y, carryin, &carryout);
1690*67e74705SXin Li //
1691*67e74705SXin Li // to LLVM IR of the form:
1692*67e74705SXin Li //
1693*67e74705SXin Li // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1694*67e74705SXin Li // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1695*67e74705SXin Li // %carry1 = extractvalue {i32, i1} %tmp1, 1
1696*67e74705SXin Li // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1697*67e74705SXin Li // i32 %carryin)
1698*67e74705SXin Li // %result = extractvalue {i32, i1} %tmp2, 0
1699*67e74705SXin Li // %carry2 = extractvalue {i32, i1} %tmp2, 1
1700*67e74705SXin Li // %tmp3 = or i1 %carry1, %carry2
1701*67e74705SXin Li // %tmp4 = zext i1 %tmp3 to i32
1702*67e74705SXin Li // store i32 %tmp4, i32* %carryout
1703*67e74705SXin Li
1704*67e74705SXin Li // Scalarize our inputs.
1705*67e74705SXin Li llvm::Value *X = EmitScalarExpr(E->getArg(0));
1706*67e74705SXin Li llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1707*67e74705SXin Li llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1708*67e74705SXin Li Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1709*67e74705SXin Li
1710*67e74705SXin Li // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1711*67e74705SXin Li llvm::Intrinsic::ID IntrinsicId;
1712*67e74705SXin Li switch (BuiltinID) {
1713*67e74705SXin Li default: llvm_unreachable("Unknown multiprecision builtin id.");
1714*67e74705SXin Li case Builtin::BI__builtin_addcb:
1715*67e74705SXin Li case Builtin::BI__builtin_addcs:
1716*67e74705SXin Li case Builtin::BI__builtin_addc:
1717*67e74705SXin Li case Builtin::BI__builtin_addcl:
1718*67e74705SXin Li case Builtin::BI__builtin_addcll:
1719*67e74705SXin Li IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1720*67e74705SXin Li break;
1721*67e74705SXin Li case Builtin::BI__builtin_subcb:
1722*67e74705SXin Li case Builtin::BI__builtin_subcs:
1723*67e74705SXin Li case Builtin::BI__builtin_subc:
1724*67e74705SXin Li case Builtin::BI__builtin_subcl:
1725*67e74705SXin Li case Builtin::BI__builtin_subcll:
1726*67e74705SXin Li IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1727*67e74705SXin Li break;
1728*67e74705SXin Li }
1729*67e74705SXin Li
1730*67e74705SXin Li // Construct our resulting LLVM IR expression.
1731*67e74705SXin Li llvm::Value *Carry1;
1732*67e74705SXin Li llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1733*67e74705SXin Li X, Y, Carry1);
1734*67e74705SXin Li llvm::Value *Carry2;
1735*67e74705SXin Li llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1736*67e74705SXin Li Sum1, Carryin, Carry2);
1737*67e74705SXin Li llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1738*67e74705SXin Li X->getType());
1739*67e74705SXin Li Builder.CreateStore(CarryOut, CarryOutPtr);
1740*67e74705SXin Li return RValue::get(Sum2);
1741*67e74705SXin Li }
1742*67e74705SXin Li
1743*67e74705SXin Li case Builtin::BI__builtin_add_overflow:
1744*67e74705SXin Li case Builtin::BI__builtin_sub_overflow:
1745*67e74705SXin Li case Builtin::BI__builtin_mul_overflow: {
1746*67e74705SXin Li const clang::Expr *LeftArg = E->getArg(0);
1747*67e74705SXin Li const clang::Expr *RightArg = E->getArg(1);
1748*67e74705SXin Li const clang::Expr *ResultArg = E->getArg(2);
1749*67e74705SXin Li
1750*67e74705SXin Li clang::QualType ResultQTy =
1751*67e74705SXin Li ResultArg->getType()->castAs<PointerType>()->getPointeeType();
1752*67e74705SXin Li
1753*67e74705SXin Li WidthAndSignedness LeftInfo =
1754*67e74705SXin Li getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
1755*67e74705SXin Li WidthAndSignedness RightInfo =
1756*67e74705SXin Li getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
1757*67e74705SXin Li WidthAndSignedness ResultInfo =
1758*67e74705SXin Li getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
1759*67e74705SXin Li WidthAndSignedness EncompassingInfo =
1760*67e74705SXin Li EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
1761*67e74705SXin Li
1762*67e74705SXin Li llvm::Type *EncompassingLLVMTy =
1763*67e74705SXin Li llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
1764*67e74705SXin Li
1765*67e74705SXin Li llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
1766*67e74705SXin Li
1767*67e74705SXin Li llvm::Intrinsic::ID IntrinsicId;
1768*67e74705SXin Li switch (BuiltinID) {
1769*67e74705SXin Li default:
1770*67e74705SXin Li llvm_unreachable("Unknown overflow builtin id.");
1771*67e74705SXin Li case Builtin::BI__builtin_add_overflow:
1772*67e74705SXin Li IntrinsicId = EncompassingInfo.Signed
1773*67e74705SXin Li ? llvm::Intrinsic::sadd_with_overflow
1774*67e74705SXin Li : llvm::Intrinsic::uadd_with_overflow;
1775*67e74705SXin Li break;
1776*67e74705SXin Li case Builtin::BI__builtin_sub_overflow:
1777*67e74705SXin Li IntrinsicId = EncompassingInfo.Signed
1778*67e74705SXin Li ? llvm::Intrinsic::ssub_with_overflow
1779*67e74705SXin Li : llvm::Intrinsic::usub_with_overflow;
1780*67e74705SXin Li break;
1781*67e74705SXin Li case Builtin::BI__builtin_mul_overflow:
1782*67e74705SXin Li IntrinsicId = EncompassingInfo.Signed
1783*67e74705SXin Li ? llvm::Intrinsic::smul_with_overflow
1784*67e74705SXin Li : llvm::Intrinsic::umul_with_overflow;
1785*67e74705SXin Li break;
1786*67e74705SXin Li }
1787*67e74705SXin Li
1788*67e74705SXin Li llvm::Value *Left = EmitScalarExpr(LeftArg);
1789*67e74705SXin Li llvm::Value *Right = EmitScalarExpr(RightArg);
1790*67e74705SXin Li Address ResultPtr = EmitPointerWithAlignment(ResultArg);
1791*67e74705SXin Li
1792*67e74705SXin Li // Extend each operand to the encompassing type.
1793*67e74705SXin Li Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
1794*67e74705SXin Li Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
1795*67e74705SXin Li
1796*67e74705SXin Li // Perform the operation on the extended values.
1797*67e74705SXin Li llvm::Value *Overflow, *Result;
1798*67e74705SXin Li Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
1799*67e74705SXin Li
1800*67e74705SXin Li if (EncompassingInfo.Width > ResultInfo.Width) {
1801*67e74705SXin Li // The encompassing type is wider than the result type, so we need to
1802*67e74705SXin Li // truncate it.
1803*67e74705SXin Li llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
1804*67e74705SXin Li
1805*67e74705SXin Li // To see if the truncation caused an overflow, we will extend
1806*67e74705SXin Li // the result and then compare it to the original result.
1807*67e74705SXin Li llvm::Value *ResultTruncExt = Builder.CreateIntCast(
1808*67e74705SXin Li ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
1809*67e74705SXin Li llvm::Value *TruncationOverflow =
1810*67e74705SXin Li Builder.CreateICmpNE(Result, ResultTruncExt);
1811*67e74705SXin Li
1812*67e74705SXin Li Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
1813*67e74705SXin Li Result = ResultTrunc;
1814*67e74705SXin Li }
1815*67e74705SXin Li
1816*67e74705SXin Li // Finally, store the result using the pointer.
1817*67e74705SXin Li bool isVolatile =
1818*67e74705SXin Li ResultArg->getType()->getPointeeType().isVolatileQualified();
1819*67e74705SXin Li Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
1820*67e74705SXin Li
1821*67e74705SXin Li return RValue::get(Overflow);
1822*67e74705SXin Li }
1823*67e74705SXin Li
1824*67e74705SXin Li case Builtin::BI__builtin_uadd_overflow:
1825*67e74705SXin Li case Builtin::BI__builtin_uaddl_overflow:
1826*67e74705SXin Li case Builtin::BI__builtin_uaddll_overflow:
1827*67e74705SXin Li case Builtin::BI__builtin_usub_overflow:
1828*67e74705SXin Li case Builtin::BI__builtin_usubl_overflow:
1829*67e74705SXin Li case Builtin::BI__builtin_usubll_overflow:
1830*67e74705SXin Li case Builtin::BI__builtin_umul_overflow:
1831*67e74705SXin Li case Builtin::BI__builtin_umull_overflow:
1832*67e74705SXin Li case Builtin::BI__builtin_umulll_overflow:
1833*67e74705SXin Li case Builtin::BI__builtin_sadd_overflow:
1834*67e74705SXin Li case Builtin::BI__builtin_saddl_overflow:
1835*67e74705SXin Li case Builtin::BI__builtin_saddll_overflow:
1836*67e74705SXin Li case Builtin::BI__builtin_ssub_overflow:
1837*67e74705SXin Li case Builtin::BI__builtin_ssubl_overflow:
1838*67e74705SXin Li case Builtin::BI__builtin_ssubll_overflow:
1839*67e74705SXin Li case Builtin::BI__builtin_smul_overflow:
1840*67e74705SXin Li case Builtin::BI__builtin_smull_overflow:
1841*67e74705SXin Li case Builtin::BI__builtin_smulll_overflow: {
1842*67e74705SXin Li
1843*67e74705SXin Li // We translate all of these builtins directly to the relevant llvm IR node.
1844*67e74705SXin Li
1845*67e74705SXin Li // Scalarize our inputs.
1846*67e74705SXin Li llvm::Value *X = EmitScalarExpr(E->getArg(0));
1847*67e74705SXin Li llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1848*67e74705SXin Li Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
1849*67e74705SXin Li
1850*67e74705SXin Li // Decide which of the overflow intrinsics we are lowering to:
1851*67e74705SXin Li llvm::Intrinsic::ID IntrinsicId;
1852*67e74705SXin Li switch (BuiltinID) {
1853*67e74705SXin Li default: llvm_unreachable("Unknown overflow builtin id.");
1854*67e74705SXin Li case Builtin::BI__builtin_uadd_overflow:
1855*67e74705SXin Li case Builtin::BI__builtin_uaddl_overflow:
1856*67e74705SXin Li case Builtin::BI__builtin_uaddll_overflow:
1857*67e74705SXin Li IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1858*67e74705SXin Li break;
1859*67e74705SXin Li case Builtin::BI__builtin_usub_overflow:
1860*67e74705SXin Li case Builtin::BI__builtin_usubl_overflow:
1861*67e74705SXin Li case Builtin::BI__builtin_usubll_overflow:
1862*67e74705SXin Li IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1863*67e74705SXin Li break;
1864*67e74705SXin Li case Builtin::BI__builtin_umul_overflow:
1865*67e74705SXin Li case Builtin::BI__builtin_umull_overflow:
1866*67e74705SXin Li case Builtin::BI__builtin_umulll_overflow:
1867*67e74705SXin Li IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1868*67e74705SXin Li break;
1869*67e74705SXin Li case Builtin::BI__builtin_sadd_overflow:
1870*67e74705SXin Li case Builtin::BI__builtin_saddl_overflow:
1871*67e74705SXin Li case Builtin::BI__builtin_saddll_overflow:
1872*67e74705SXin Li IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1873*67e74705SXin Li break;
1874*67e74705SXin Li case Builtin::BI__builtin_ssub_overflow:
1875*67e74705SXin Li case Builtin::BI__builtin_ssubl_overflow:
1876*67e74705SXin Li case Builtin::BI__builtin_ssubll_overflow:
1877*67e74705SXin Li IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1878*67e74705SXin Li break;
1879*67e74705SXin Li case Builtin::BI__builtin_smul_overflow:
1880*67e74705SXin Li case Builtin::BI__builtin_smull_overflow:
1881*67e74705SXin Li case Builtin::BI__builtin_smulll_overflow:
1882*67e74705SXin Li IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1883*67e74705SXin Li break;
1884*67e74705SXin Li }
1885*67e74705SXin Li
1886*67e74705SXin Li
1887*67e74705SXin Li llvm::Value *Carry;
1888*67e74705SXin Li llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1889*67e74705SXin Li Builder.CreateStore(Sum, SumOutPtr);
1890*67e74705SXin Li
1891*67e74705SXin Li return RValue::get(Carry);
1892*67e74705SXin Li }
1893*67e74705SXin Li case Builtin::BI__builtin_addressof:
1894*67e74705SXin Li return RValue::get(EmitLValue(E->getArg(0)).getPointer());
1895*67e74705SXin Li case Builtin::BI__builtin_operator_new:
1896*67e74705SXin Li return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1897*67e74705SXin Li E->getArg(0), false);
1898*67e74705SXin Li case Builtin::BI__builtin_operator_delete:
1899*67e74705SXin Li return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1900*67e74705SXin Li E->getArg(0), true);
1901*67e74705SXin Li case Builtin::BI__noop:
1902*67e74705SXin Li // __noop always evaluates to an integer literal zero.
1903*67e74705SXin Li return RValue::get(ConstantInt::get(IntTy, 0));
1904*67e74705SXin Li case Builtin::BI__builtin_call_with_static_chain: {
1905*67e74705SXin Li const CallExpr *Call = cast<CallExpr>(E->getArg(0));
1906*67e74705SXin Li const Expr *Chain = E->getArg(1);
1907*67e74705SXin Li return EmitCall(Call->getCallee()->getType(),
1908*67e74705SXin Li EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
1909*67e74705SXin Li Call->getCalleeDecl(), EmitScalarExpr(Chain));
1910*67e74705SXin Li }
1911*67e74705SXin Li case Builtin::BI_InterlockedExchange:
1912*67e74705SXin Li case Builtin::BI_InterlockedExchangePointer:
1913*67e74705SXin Li return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1914*67e74705SXin Li case Builtin::BI_InterlockedCompareExchangePointer: {
1915*67e74705SXin Li llvm::Type *RTy;
1916*67e74705SXin Li llvm::IntegerType *IntType =
1917*67e74705SXin Li IntegerType::get(getLLVMContext(),
1918*67e74705SXin Li getContext().getTypeSize(E->getType()));
1919*67e74705SXin Li llvm::Type *IntPtrType = IntType->getPointerTo();
1920*67e74705SXin Li
1921*67e74705SXin Li llvm::Value *Destination =
1922*67e74705SXin Li Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
1923*67e74705SXin Li
1924*67e74705SXin Li llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
1925*67e74705SXin Li RTy = Exchange->getType();
1926*67e74705SXin Li Exchange = Builder.CreatePtrToInt(Exchange, IntType);
1927*67e74705SXin Li
1928*67e74705SXin Li llvm::Value *Comparand =
1929*67e74705SXin Li Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
1930*67e74705SXin Li
1931*67e74705SXin Li auto Result =
1932*67e74705SXin Li Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
1933*67e74705SXin Li AtomicOrdering::SequentiallyConsistent,
1934*67e74705SXin Li AtomicOrdering::SequentiallyConsistent);
1935*67e74705SXin Li Result->setVolatile(true);
1936*67e74705SXin Li
1937*67e74705SXin Li return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
1938*67e74705SXin Li 0),
1939*67e74705SXin Li RTy));
1940*67e74705SXin Li }
1941*67e74705SXin Li case Builtin::BI_InterlockedCompareExchange: {
1942*67e74705SXin Li AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
1943*67e74705SXin Li EmitScalarExpr(E->getArg(0)),
1944*67e74705SXin Li EmitScalarExpr(E->getArg(2)),
1945*67e74705SXin Li EmitScalarExpr(E->getArg(1)),
1946*67e74705SXin Li AtomicOrdering::SequentiallyConsistent,
1947*67e74705SXin Li AtomicOrdering::SequentiallyConsistent);
1948*67e74705SXin Li CXI->setVolatile(true);
1949*67e74705SXin Li return RValue::get(Builder.CreateExtractValue(CXI, 0));
1950*67e74705SXin Li }
1951*67e74705SXin Li case Builtin::BI_InterlockedIncrement: {
1952*67e74705SXin Li llvm::Type *IntTy = ConvertType(E->getType());
1953*67e74705SXin Li AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1954*67e74705SXin Li AtomicRMWInst::Add,
1955*67e74705SXin Li EmitScalarExpr(E->getArg(0)),
1956*67e74705SXin Li ConstantInt::get(IntTy, 1),
1957*67e74705SXin Li llvm::AtomicOrdering::SequentiallyConsistent);
1958*67e74705SXin Li RMWI->setVolatile(true);
1959*67e74705SXin Li return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)));
1960*67e74705SXin Li }
1961*67e74705SXin Li case Builtin::BI_InterlockedDecrement: {
1962*67e74705SXin Li llvm::Type *IntTy = ConvertType(E->getType());
1963*67e74705SXin Li AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1964*67e74705SXin Li AtomicRMWInst::Sub,
1965*67e74705SXin Li EmitScalarExpr(E->getArg(0)),
1966*67e74705SXin Li ConstantInt::get(IntTy, 1),
1967*67e74705SXin Li llvm::AtomicOrdering::SequentiallyConsistent);
1968*67e74705SXin Li RMWI->setVolatile(true);
1969*67e74705SXin Li return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)));
1970*67e74705SXin Li }
1971*67e74705SXin Li case Builtin::BI_InterlockedExchangeAdd: {
1972*67e74705SXin Li AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1973*67e74705SXin Li AtomicRMWInst::Add,
1974*67e74705SXin Li EmitScalarExpr(E->getArg(0)),
1975*67e74705SXin Li EmitScalarExpr(E->getArg(1)),
1976*67e74705SXin Li llvm::AtomicOrdering::SequentiallyConsistent);
1977*67e74705SXin Li RMWI->setVolatile(true);
1978*67e74705SXin Li return RValue::get(RMWI);
1979*67e74705SXin Li }
1980*67e74705SXin Li case Builtin::BI__readfsdword: {
1981*67e74705SXin Li llvm::Type *IntTy = ConvertType(E->getType());
1982*67e74705SXin Li Value *IntToPtr =
1983*67e74705SXin Li Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
1984*67e74705SXin Li llvm::PointerType::get(IntTy, 257));
1985*67e74705SXin Li LoadInst *Load =
1986*67e74705SXin Li Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true);
1987*67e74705SXin Li return RValue::get(Load);
1988*67e74705SXin Li }
1989*67e74705SXin Li
1990*67e74705SXin Li case Builtin::BI__exception_code:
1991*67e74705SXin Li case Builtin::BI_exception_code:
1992*67e74705SXin Li return RValue::get(EmitSEHExceptionCode());
1993*67e74705SXin Li case Builtin::BI__exception_info:
1994*67e74705SXin Li case Builtin::BI_exception_info:
1995*67e74705SXin Li return RValue::get(EmitSEHExceptionInfo());
1996*67e74705SXin Li case Builtin::BI__abnormal_termination:
1997*67e74705SXin Li case Builtin::BI_abnormal_termination:
1998*67e74705SXin Li return RValue::get(EmitSEHAbnormalTermination());
1999*67e74705SXin Li case Builtin::BI_setjmpex: {
2000*67e74705SXin Li if (getTarget().getTriple().isOSMSVCRT()) {
2001*67e74705SXin Li llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2002*67e74705SXin Li llvm::AttributeSet ReturnsTwiceAttr =
2003*67e74705SXin Li AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2004*67e74705SXin Li llvm::Attribute::ReturnsTwice);
2005*67e74705SXin Li llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2006*67e74705SXin Li llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2007*67e74705SXin Li "_setjmpex", ReturnsTwiceAttr);
2008*67e74705SXin Li llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2009*67e74705SXin Li EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2010*67e74705SXin Li llvm::Value *FrameAddr =
2011*67e74705SXin Li Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2012*67e74705SXin Li ConstantInt::get(Int32Ty, 0));
2013*67e74705SXin Li llvm::Value *Args[] = {Buf, FrameAddr};
2014*67e74705SXin Li llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2015*67e74705SXin Li CS.setAttributes(ReturnsTwiceAttr);
2016*67e74705SXin Li return RValue::get(CS.getInstruction());
2017*67e74705SXin Li }
2018*67e74705SXin Li break;
2019*67e74705SXin Li }
2020*67e74705SXin Li case Builtin::BI_setjmp: {
2021*67e74705SXin Li if (getTarget().getTriple().isOSMSVCRT()) {
2022*67e74705SXin Li llvm::AttributeSet ReturnsTwiceAttr =
2023*67e74705SXin Li AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2024*67e74705SXin Li llvm::Attribute::ReturnsTwice);
2025*67e74705SXin Li llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2026*67e74705SXin Li EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2027*67e74705SXin Li llvm::CallSite CS;
2028*67e74705SXin Li if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2029*67e74705SXin Li llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2030*67e74705SXin Li llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2031*67e74705SXin Li llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2032*67e74705SXin Li "_setjmp3", ReturnsTwiceAttr);
2033*67e74705SXin Li llvm::Value *Count = ConstantInt::get(IntTy, 0);
2034*67e74705SXin Li llvm::Value *Args[] = {Buf, Count};
2035*67e74705SXin Li CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2036*67e74705SXin Li } else {
2037*67e74705SXin Li llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2038*67e74705SXin Li llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2039*67e74705SXin Li llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2040*67e74705SXin Li "_setjmp", ReturnsTwiceAttr);
2041*67e74705SXin Li llvm::Value *FrameAddr =
2042*67e74705SXin Li Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2043*67e74705SXin Li ConstantInt::get(Int32Ty, 0));
2044*67e74705SXin Li llvm::Value *Args[] = {Buf, FrameAddr};
2045*67e74705SXin Li CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2046*67e74705SXin Li }
2047*67e74705SXin Li CS.setAttributes(ReturnsTwiceAttr);
2048*67e74705SXin Li return RValue::get(CS.getInstruction());
2049*67e74705SXin Li }
2050*67e74705SXin Li break;
2051*67e74705SXin Li }
2052*67e74705SXin Li
2053*67e74705SXin Li case Builtin::BI__GetExceptionInfo: {
2054*67e74705SXin Li if (llvm::GlobalVariable *GV =
2055*67e74705SXin Li CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2056*67e74705SXin Li return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2057*67e74705SXin Li break;
2058*67e74705SXin Li }
2059*67e74705SXin Li
2060*67e74705SXin Li // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2061*67e74705SXin Li case Builtin::BIread_pipe:
2062*67e74705SXin Li case Builtin::BIwrite_pipe: {
2063*67e74705SXin Li Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2064*67e74705SXin Li *Arg1 = EmitScalarExpr(E->getArg(1));
2065*67e74705SXin Li
2066*67e74705SXin Li // Type of the generic packet parameter.
2067*67e74705SXin Li unsigned GenericAS =
2068*67e74705SXin Li getContext().getTargetAddressSpace(LangAS::opencl_generic);
2069*67e74705SXin Li llvm::Type *I8PTy = llvm::PointerType::get(
2070*67e74705SXin Li llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2071*67e74705SXin Li
2072*67e74705SXin Li // Testing which overloaded version we should generate the call for.
2073*67e74705SXin Li if (2U == E->getNumArgs()) {
2074*67e74705SXin Li const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2075*67e74705SXin Li : "__write_pipe_2";
2076*67e74705SXin Li // Creating a generic function type to be able to call with any builtin or
2077*67e74705SXin Li // user defined type.
2078*67e74705SXin Li llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy};
2079*67e74705SXin Li llvm::FunctionType *FTy = llvm::FunctionType::get(
2080*67e74705SXin Li Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2081*67e74705SXin Li Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2082*67e74705SXin Li return RValue::get(Builder.CreateCall(
2083*67e74705SXin Li CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast}));
2084*67e74705SXin Li } else {
2085*67e74705SXin Li assert(4 == E->getNumArgs() &&
2086*67e74705SXin Li "Illegal number of parameters to pipe function");
2087*67e74705SXin Li const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2088*67e74705SXin Li : "__write_pipe_4";
2089*67e74705SXin Li
2090*67e74705SXin Li llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy};
2091*67e74705SXin Li Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2092*67e74705SXin Li *Arg3 = EmitScalarExpr(E->getArg(3));
2093*67e74705SXin Li llvm::FunctionType *FTy = llvm::FunctionType::get(
2094*67e74705SXin Li Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2095*67e74705SXin Li Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2096*67e74705SXin Li // We know the third argument is an integer type, but we may need to cast
2097*67e74705SXin Li // it to i32.
2098*67e74705SXin Li if (Arg2->getType() != Int32Ty)
2099*67e74705SXin Li Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2100*67e74705SXin Li return RValue::get(Builder.CreateCall(
2101*67e74705SXin Li CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast}));
2102*67e74705SXin Li }
2103*67e74705SXin Li }
2104*67e74705SXin Li // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2105*67e74705SXin Li // functions
2106*67e74705SXin Li case Builtin::BIreserve_read_pipe:
2107*67e74705SXin Li case Builtin::BIreserve_write_pipe:
2108*67e74705SXin Li case Builtin::BIwork_group_reserve_read_pipe:
2109*67e74705SXin Li case Builtin::BIwork_group_reserve_write_pipe:
2110*67e74705SXin Li case Builtin::BIsub_group_reserve_read_pipe:
2111*67e74705SXin Li case Builtin::BIsub_group_reserve_write_pipe: {
2112*67e74705SXin Li // Composing the mangled name for the function.
2113*67e74705SXin Li const char *Name;
2114*67e74705SXin Li if (BuiltinID == Builtin::BIreserve_read_pipe)
2115*67e74705SXin Li Name = "__reserve_read_pipe";
2116*67e74705SXin Li else if (BuiltinID == Builtin::BIreserve_write_pipe)
2117*67e74705SXin Li Name = "__reserve_write_pipe";
2118*67e74705SXin Li else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2119*67e74705SXin Li Name = "__work_group_reserve_read_pipe";
2120*67e74705SXin Li else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2121*67e74705SXin Li Name = "__work_group_reserve_write_pipe";
2122*67e74705SXin Li else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2123*67e74705SXin Li Name = "__sub_group_reserve_read_pipe";
2124*67e74705SXin Li else
2125*67e74705SXin Li Name = "__sub_group_reserve_write_pipe";
2126*67e74705SXin Li
2127*67e74705SXin Li Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2128*67e74705SXin Li *Arg1 = EmitScalarExpr(E->getArg(1));
2129*67e74705SXin Li llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2130*67e74705SXin Li
2131*67e74705SXin Li // Building the generic function prototype.
2132*67e74705SXin Li llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty};
2133*67e74705SXin Li llvm::FunctionType *FTy = llvm::FunctionType::get(
2134*67e74705SXin Li ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2135*67e74705SXin Li // We know the second argument is an integer type, but we may need to cast
2136*67e74705SXin Li // it to i32.
2137*67e74705SXin Li if (Arg1->getType() != Int32Ty)
2138*67e74705SXin Li Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2139*67e74705SXin Li return RValue::get(
2140*67e74705SXin Li Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
2141*67e74705SXin Li }
2142*67e74705SXin Li // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2143*67e74705SXin Li // functions
2144*67e74705SXin Li case Builtin::BIcommit_read_pipe:
2145*67e74705SXin Li case Builtin::BIcommit_write_pipe:
2146*67e74705SXin Li case Builtin::BIwork_group_commit_read_pipe:
2147*67e74705SXin Li case Builtin::BIwork_group_commit_write_pipe:
2148*67e74705SXin Li case Builtin::BIsub_group_commit_read_pipe:
2149*67e74705SXin Li case Builtin::BIsub_group_commit_write_pipe: {
2150*67e74705SXin Li const char *Name;
2151*67e74705SXin Li if (BuiltinID == Builtin::BIcommit_read_pipe)
2152*67e74705SXin Li Name = "__commit_read_pipe";
2153*67e74705SXin Li else if (BuiltinID == Builtin::BIcommit_write_pipe)
2154*67e74705SXin Li Name = "__commit_write_pipe";
2155*67e74705SXin Li else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2156*67e74705SXin Li Name = "__work_group_commit_read_pipe";
2157*67e74705SXin Li else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2158*67e74705SXin Li Name = "__work_group_commit_write_pipe";
2159*67e74705SXin Li else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2160*67e74705SXin Li Name = "__sub_group_commit_read_pipe";
2161*67e74705SXin Li else
2162*67e74705SXin Li Name = "__sub_group_commit_write_pipe";
2163*67e74705SXin Li
2164*67e74705SXin Li Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2165*67e74705SXin Li *Arg1 = EmitScalarExpr(E->getArg(1));
2166*67e74705SXin Li
2167*67e74705SXin Li // Building the generic function prototype.
2168*67e74705SXin Li llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()};
2169*67e74705SXin Li llvm::FunctionType *FTy =
2170*67e74705SXin Li llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2171*67e74705SXin Li llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2172*67e74705SXin Li
2173*67e74705SXin Li return RValue::get(
2174*67e74705SXin Li Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
2175*67e74705SXin Li }
2176*67e74705SXin Li // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2177*67e74705SXin Li case Builtin::BIget_pipe_num_packets:
2178*67e74705SXin Li case Builtin::BIget_pipe_max_packets: {
2179*67e74705SXin Li const char *Name;
2180*67e74705SXin Li if (BuiltinID == Builtin::BIget_pipe_num_packets)
2181*67e74705SXin Li Name = "__get_pipe_num_packets";
2182*67e74705SXin Li else
2183*67e74705SXin Li Name = "__get_pipe_max_packets";
2184*67e74705SXin Li
2185*67e74705SXin Li // Building the generic function prototype.
2186*67e74705SXin Li Value *Arg0 = EmitScalarExpr(E->getArg(0));
2187*67e74705SXin Li llvm::Type *ArgTys[] = {Arg0->getType()};
2188*67e74705SXin Li llvm::FunctionType *FTy = llvm::FunctionType::get(
2189*67e74705SXin Li Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2190*67e74705SXin Li
2191*67e74705SXin Li return RValue::get(
2192*67e74705SXin Li Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0}));
2193*67e74705SXin Li }
2194*67e74705SXin Li
2195*67e74705SXin Li // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2196*67e74705SXin Li case Builtin::BIto_global:
2197*67e74705SXin Li case Builtin::BIto_local:
2198*67e74705SXin Li case Builtin::BIto_private: {
2199*67e74705SXin Li auto Arg0 = EmitScalarExpr(E->getArg(0));
2200*67e74705SXin Li auto NewArgT = llvm::PointerType::get(Int8Ty,
2201*67e74705SXin Li CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2202*67e74705SXin Li auto NewRetT = llvm::PointerType::get(Int8Ty,
2203*67e74705SXin Li CGM.getContext().getTargetAddressSpace(
2204*67e74705SXin Li E->getType()->getPointeeType().getAddressSpace()));
2205*67e74705SXin Li auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2206*67e74705SXin Li llvm::Value *NewArg;
2207*67e74705SXin Li if (Arg0->getType()->getPointerAddressSpace() !=
2208*67e74705SXin Li NewArgT->getPointerAddressSpace())
2209*67e74705SXin Li NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2210*67e74705SXin Li else
2211*67e74705SXin Li NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2212*67e74705SXin Li auto NewCall = Builder.CreateCall(CGM.CreateRuntimeFunction(FTy,
2213*67e74705SXin Li E->getDirectCallee()->getName()), {NewArg});
2214*67e74705SXin Li return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2215*67e74705SXin Li ConvertType(E->getType())));
2216*67e74705SXin Li }
2217*67e74705SXin Li
2218*67e74705SXin Li // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2219*67e74705SXin Li // It contains four different overload formats specified in Table 6.13.17.1.
2220*67e74705SXin Li case Builtin::BIenqueue_kernel: {
2221*67e74705SXin Li StringRef Name; // Generated function call name
2222*67e74705SXin Li unsigned NumArgs = E->getNumArgs();
2223*67e74705SXin Li
2224*67e74705SXin Li llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2225*67e74705SXin Li llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
2226*67e74705SXin Li
2227*67e74705SXin Li llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2228*67e74705SXin Li llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2229*67e74705SXin Li llvm::Value *Range = EmitScalarExpr(E->getArg(2));
2230*67e74705SXin Li
2231*67e74705SXin Li if (NumArgs == 4) {
2232*67e74705SXin Li // The most basic form of the call with parameters:
2233*67e74705SXin Li // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2234*67e74705SXin Li Name = "__enqueue_kernel_basic";
2235*67e74705SXin Li llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
2236*67e74705SXin Li llvm::FunctionType *FTy = llvm::FunctionType::get(
2237*67e74705SXin Li Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2238*67e74705SXin Li
2239*67e74705SXin Li llvm::Value *Block =
2240*67e74705SXin Li Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2241*67e74705SXin Li
2242*67e74705SXin Li return RValue::get(Builder.CreateCall(
2243*67e74705SXin Li CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
2244*67e74705SXin Li }
2245*67e74705SXin Li assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2246*67e74705SXin Li
2247*67e74705SXin Li // Could have events and/or vaargs.
2248*67e74705SXin Li if (E->getArg(3)->getType()->isBlockPointerType()) {
2249*67e74705SXin Li // No events passed, but has variadic arguments.
2250*67e74705SXin Li Name = "__enqueue_kernel_vaargs";
2251*67e74705SXin Li llvm::Value *Block =
2252*67e74705SXin Li Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2253*67e74705SXin Li // Create a vector of the arguments, as well as a constant value to
2254*67e74705SXin Li // express to the runtime the number of variadic arguments.
2255*67e74705SXin Li std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2256*67e74705SXin Li ConstantInt::get(IntTy, NumArgs - 4)};
2257*67e74705SXin Li std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
2258*67e74705SXin Li IntTy};
2259*67e74705SXin Li
2260*67e74705SXin Li // Add the variadics.
2261*67e74705SXin Li for (unsigned I = 4; I < NumArgs; ++I) {
2262*67e74705SXin Li llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
2263*67e74705SXin Li unsigned TypeSizeInBytes =
2264*67e74705SXin Li getContext()
2265*67e74705SXin Li .getTypeSizeInChars(E->getArg(I)->getType())
2266*67e74705SXin Li .getQuantity();
2267*67e74705SXin Li Args.push_back(TypeSizeInBytes < 4
2268*67e74705SXin Li ? Builder.CreateZExt(ArgSize, Int32Ty)
2269*67e74705SXin Li : ArgSize);
2270*67e74705SXin Li }
2271*67e74705SXin Li
2272*67e74705SXin Li llvm::FunctionType *FTy = llvm::FunctionType::get(
2273*67e74705SXin Li Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2274*67e74705SXin Li return RValue::get(
2275*67e74705SXin Li Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2276*67e74705SXin Li llvm::ArrayRef<llvm::Value *>(Args)));
2277*67e74705SXin Li }
2278*67e74705SXin Li // Any calls now have event arguments passed.
2279*67e74705SXin Li if (NumArgs >= 7) {
2280*67e74705SXin Li llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2281*67e74705SXin Li unsigned AS4 =
2282*67e74705SXin Li E->getArg(4)->getType()->isArrayType()
2283*67e74705SXin Li ? E->getArg(4)->getType().getAddressSpace()
2284*67e74705SXin Li : E->getArg(4)->getType()->getPointeeType().getAddressSpace();
2285*67e74705SXin Li llvm::Type *EventPtrAS4Ty =
2286*67e74705SXin Li EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4));
2287*67e74705SXin Li unsigned AS5 =
2288*67e74705SXin Li E->getArg(5)->getType()->getPointeeType().getAddressSpace();
2289*67e74705SXin Li llvm::Type *EventPtrAS5Ty =
2290*67e74705SXin Li EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5));
2291*67e74705SXin Li
2292*67e74705SXin Li llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3));
2293*67e74705SXin Li llvm::Value *EventList =
2294*67e74705SXin Li E->getArg(4)->getType()->isArrayType()
2295*67e74705SXin Li ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2296*67e74705SXin Li : EmitScalarExpr(E->getArg(4));
2297*67e74705SXin Li llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2298*67e74705SXin Li llvm::Value *Block =
2299*67e74705SXin Li Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
2300*67e74705SXin Li
2301*67e74705SXin Li std::vector<llvm::Type *> ArgTys = {
2302*67e74705SXin Li QueueTy, Int32Ty, RangeTy, Int32Ty,
2303*67e74705SXin Li EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy};
2304*67e74705SXin Li std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
2305*67e74705SXin Li EventList, ClkEvent, Block};
2306*67e74705SXin Li
2307*67e74705SXin Li if (NumArgs == 7) {
2308*67e74705SXin Li // Has events but no variadics.
2309*67e74705SXin Li Name = "__enqueue_kernel_basic_events";
2310*67e74705SXin Li llvm::FunctionType *FTy = llvm::FunctionType::get(
2311*67e74705SXin Li Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2312*67e74705SXin Li return RValue::get(
2313*67e74705SXin Li Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2314*67e74705SXin Li llvm::ArrayRef<llvm::Value *>(Args)));
2315*67e74705SXin Li }
2316*67e74705SXin Li // Has event info and variadics
2317*67e74705SXin Li // Pass the number of variadics to the runtime function too.
2318*67e74705SXin Li Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2319*67e74705SXin Li ArgTys.push_back(Int32Ty);
2320*67e74705SXin Li Name = "__enqueue_kernel_events_vaargs";
2321*67e74705SXin Li
2322*67e74705SXin Li // Add the variadics.
2323*67e74705SXin Li for (unsigned I = 7; I < NumArgs; ++I) {
2324*67e74705SXin Li llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
2325*67e74705SXin Li unsigned TypeSizeInBytes =
2326*67e74705SXin Li getContext()
2327*67e74705SXin Li .getTypeSizeInChars(E->getArg(I)->getType())
2328*67e74705SXin Li .getQuantity();
2329*67e74705SXin Li Args.push_back(TypeSizeInBytes < 4
2330*67e74705SXin Li ? Builder.CreateZExt(ArgSize, Int32Ty)
2331*67e74705SXin Li : ArgSize);
2332*67e74705SXin Li }
2333*67e74705SXin Li llvm::FunctionType *FTy = llvm::FunctionType::get(
2334*67e74705SXin Li Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2335*67e74705SXin Li return RValue::get(
2336*67e74705SXin Li Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2337*67e74705SXin Li llvm::ArrayRef<llvm::Value *>(Args)));
2338*67e74705SXin Li }
2339*67e74705SXin Li }
2340*67e74705SXin Li // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2341*67e74705SXin Li // parameter.
2342*67e74705SXin Li case Builtin::BIget_kernel_work_group_size: {
2343*67e74705SXin Li Value *Arg = EmitScalarExpr(E->getArg(0));
2344*67e74705SXin Li Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2345*67e74705SXin Li return RValue::get(
2346*67e74705SXin Li Builder.CreateCall(CGM.CreateRuntimeFunction(
2347*67e74705SXin Li llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2348*67e74705SXin Li "__get_kernel_work_group_size_impl"),
2349*67e74705SXin Li Arg));
2350*67e74705SXin Li }
2351*67e74705SXin Li case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2352*67e74705SXin Li Value *Arg = EmitScalarExpr(E->getArg(0));
2353*67e74705SXin Li Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2354*67e74705SXin Li return RValue::get(Builder.CreateCall(
2355*67e74705SXin Li CGM.CreateRuntimeFunction(
2356*67e74705SXin Li llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2357*67e74705SXin Li "__get_kernel_preferred_work_group_multiple_impl"),
2358*67e74705SXin Li Arg));
2359*67e74705SXin Li }
2360*67e74705SXin Li case Builtin::BIprintf:
2361*67e74705SXin Li if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
2362*67e74705SXin Li return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
2363*67e74705SXin Li break;
2364*67e74705SXin Li case Builtin::BI__builtin_canonicalize:
2365*67e74705SXin Li case Builtin::BI__builtin_canonicalizef:
2366*67e74705SXin Li case Builtin::BI__builtin_canonicalizel:
2367*67e74705SXin Li return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2368*67e74705SXin Li
2369*67e74705SXin Li case Builtin::BI__builtin_thread_pointer: {
2370*67e74705SXin Li if (!getContext().getTargetInfo().isTLSSupported())
2371*67e74705SXin Li CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2372*67e74705SXin Li // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2373*67e74705SXin Li break;
2374*67e74705SXin Li }
2375*67e74705SXin Li }
2376*67e74705SXin Li
2377*67e74705SXin Li // If this is an alias for a lib function (e.g. __builtin_sin), emit
2378*67e74705SXin Li // the call using the normal call path, but using the unmangled
2379*67e74705SXin Li // version of the function name.
2380*67e74705SXin Li if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2381*67e74705SXin Li return emitLibraryCall(*this, FD, E,
2382*67e74705SXin Li CGM.getBuiltinLibFunction(FD, BuiltinID));
2383*67e74705SXin Li
2384*67e74705SXin Li // If this is a predefined lib function (e.g. malloc), emit the call
2385*67e74705SXin Li // using exactly the normal call path.
2386*67e74705SXin Li if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2387*67e74705SXin Li return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
2388*67e74705SXin Li
2389*67e74705SXin Li // Check that a call to a target specific builtin has the correct target
2390*67e74705SXin Li // features.
2391*67e74705SXin Li // This is down here to avoid non-target specific builtins, however, if
2392*67e74705SXin Li // generic builtins start to require generic target features then we
2393*67e74705SXin Li // can move this up to the beginning of the function.
2394*67e74705SXin Li checkTargetFeatures(E, FD);
2395*67e74705SXin Li
2396*67e74705SXin Li // See if we have a target specific intrinsic.
2397*67e74705SXin Li const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2398*67e74705SXin Li Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2399*67e74705SXin Li if (const char *Prefix =
2400*67e74705SXin Li llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
2401*67e74705SXin Li IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
2402*67e74705SXin Li // NOTE we dont need to perform a compatibility flag check here since the
2403*67e74705SXin Li // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2404*67e74705SXin Li // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2405*67e74705SXin Li if (IntrinsicID == Intrinsic::not_intrinsic)
2406*67e74705SXin Li IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name);
2407*67e74705SXin Li }
2408*67e74705SXin Li
2409*67e74705SXin Li if (IntrinsicID != Intrinsic::not_intrinsic) {
2410*67e74705SXin Li SmallVector<Value*, 16> Args;
2411*67e74705SXin Li
2412*67e74705SXin Li // Find out if any arguments are required to be integer constant
2413*67e74705SXin Li // expressions.
2414*67e74705SXin Li unsigned ICEArguments = 0;
2415*67e74705SXin Li ASTContext::GetBuiltinTypeError Error;
2416*67e74705SXin Li getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2417*67e74705SXin Li assert(Error == ASTContext::GE_None && "Should not codegen an error");
2418*67e74705SXin Li
2419*67e74705SXin Li Function *F = CGM.getIntrinsic(IntrinsicID);
2420*67e74705SXin Li llvm::FunctionType *FTy = F->getFunctionType();
2421*67e74705SXin Li
2422*67e74705SXin Li for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2423*67e74705SXin Li Value *ArgValue;
2424*67e74705SXin Li // If this is a normal argument, just emit it as a scalar.
2425*67e74705SXin Li if ((ICEArguments & (1 << i)) == 0) {
2426*67e74705SXin Li ArgValue = EmitScalarExpr(E->getArg(i));
2427*67e74705SXin Li } else {
2428*67e74705SXin Li // If this is required to be a constant, constant fold it so that we
2429*67e74705SXin Li // know that the generated intrinsic gets a ConstantInt.
2430*67e74705SXin Li llvm::APSInt Result;
2431*67e74705SXin Li bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2432*67e74705SXin Li assert(IsConst && "Constant arg isn't actually constant?");
2433*67e74705SXin Li (void)IsConst;
2434*67e74705SXin Li ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2435*67e74705SXin Li }
2436*67e74705SXin Li
2437*67e74705SXin Li // If the intrinsic arg type is different from the builtin arg type
2438*67e74705SXin Li // we need to do a bit cast.
2439*67e74705SXin Li llvm::Type *PTy = FTy->getParamType(i);
2440*67e74705SXin Li if (PTy != ArgValue->getType()) {
2441*67e74705SXin Li assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2442*67e74705SXin Li "Must be able to losslessly bit cast to param");
2443*67e74705SXin Li ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2444*67e74705SXin Li }
2445*67e74705SXin Li
2446*67e74705SXin Li Args.push_back(ArgValue);
2447*67e74705SXin Li }
2448*67e74705SXin Li
2449*67e74705SXin Li Value *V = Builder.CreateCall(F, Args);
2450*67e74705SXin Li QualType BuiltinRetType = E->getType();
2451*67e74705SXin Li
2452*67e74705SXin Li llvm::Type *RetTy = VoidTy;
2453*67e74705SXin Li if (!BuiltinRetType->isVoidType())
2454*67e74705SXin Li RetTy = ConvertType(BuiltinRetType);
2455*67e74705SXin Li
2456*67e74705SXin Li if (RetTy != V->getType()) {
2457*67e74705SXin Li assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2458*67e74705SXin Li "Must be able to losslessly bit cast result type");
2459*67e74705SXin Li V = Builder.CreateBitCast(V, RetTy);
2460*67e74705SXin Li }
2461*67e74705SXin Li
2462*67e74705SXin Li return RValue::get(V);
2463*67e74705SXin Li }
2464*67e74705SXin Li
2465*67e74705SXin Li // See if we have a target specific builtin that needs to be lowered.
2466*67e74705SXin Li if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2467*67e74705SXin Li return RValue::get(V);
2468*67e74705SXin Li
2469*67e74705SXin Li ErrorUnsupported(E, "builtin function");
2470*67e74705SXin Li
2471*67e74705SXin Li // Unknown builtin, for now just dump it out and return undef.
2472*67e74705SXin Li return GetUndefRValue(E->getType());
2473*67e74705SXin Li }
2474*67e74705SXin Li
EmitTargetArchBuiltinExpr(CodeGenFunction * CGF,unsigned BuiltinID,const CallExpr * E,llvm::Triple::ArchType Arch)2475*67e74705SXin Li static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2476*67e74705SXin Li unsigned BuiltinID, const CallExpr *E,
2477*67e74705SXin Li llvm::Triple::ArchType Arch) {
2478*67e74705SXin Li switch (Arch) {
2479*67e74705SXin Li case llvm::Triple::arm:
2480*67e74705SXin Li case llvm::Triple::armeb:
2481*67e74705SXin Li case llvm::Triple::thumb:
2482*67e74705SXin Li case llvm::Triple::thumbeb:
2483*67e74705SXin Li return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2484*67e74705SXin Li case llvm::Triple::aarch64:
2485*67e74705SXin Li case llvm::Triple::aarch64_be:
2486*67e74705SXin Li return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2487*67e74705SXin Li case llvm::Triple::x86:
2488*67e74705SXin Li case llvm::Triple::x86_64:
2489*67e74705SXin Li return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2490*67e74705SXin Li case llvm::Triple::ppc:
2491*67e74705SXin Li case llvm::Triple::ppc64:
2492*67e74705SXin Li case llvm::Triple::ppc64le:
2493*67e74705SXin Li return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2494*67e74705SXin Li case llvm::Triple::r600:
2495*67e74705SXin Li case llvm::Triple::amdgcn:
2496*67e74705SXin Li return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2497*67e74705SXin Li case llvm::Triple::systemz:
2498*67e74705SXin Li return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2499*67e74705SXin Li case llvm::Triple::nvptx:
2500*67e74705SXin Li case llvm::Triple::nvptx64:
2501*67e74705SXin Li return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2502*67e74705SXin Li case llvm::Triple::wasm32:
2503*67e74705SXin Li case llvm::Triple::wasm64:
2504*67e74705SXin Li return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2505*67e74705SXin Li default:
2506*67e74705SXin Li return nullptr;
2507*67e74705SXin Li }
2508*67e74705SXin Li }
2509*67e74705SXin Li
EmitTargetBuiltinExpr(unsigned BuiltinID,const CallExpr * E)2510*67e74705SXin Li Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2511*67e74705SXin Li const CallExpr *E) {
2512*67e74705SXin Li if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2513*67e74705SXin Li assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2514*67e74705SXin Li return EmitTargetArchBuiltinExpr(
2515*67e74705SXin Li this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2516*67e74705SXin Li getContext().getAuxTargetInfo()->getTriple().getArch());
2517*67e74705SXin Li }
2518*67e74705SXin Li
2519*67e74705SXin Li return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2520*67e74705SXin Li getTarget().getTriple().getArch());
2521*67e74705SXin Li }
2522*67e74705SXin Li
GetNeonType(CodeGenFunction * CGF,NeonTypeFlags TypeFlags,bool V1Ty=false)2523*67e74705SXin Li static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2524*67e74705SXin Li NeonTypeFlags TypeFlags,
2525*67e74705SXin Li bool V1Ty=false) {
2526*67e74705SXin Li int IsQuad = TypeFlags.isQuad();
2527*67e74705SXin Li switch (TypeFlags.getEltType()) {
2528*67e74705SXin Li case NeonTypeFlags::Int8:
2529*67e74705SXin Li case NeonTypeFlags::Poly8:
2530*67e74705SXin Li return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2531*67e74705SXin Li case NeonTypeFlags::Int16:
2532*67e74705SXin Li case NeonTypeFlags::Poly16:
2533*67e74705SXin Li case NeonTypeFlags::Float16:
2534*67e74705SXin Li return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2535*67e74705SXin Li case NeonTypeFlags::Int32:
2536*67e74705SXin Li return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2537*67e74705SXin Li case NeonTypeFlags::Int64:
2538*67e74705SXin Li case NeonTypeFlags::Poly64:
2539*67e74705SXin Li return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2540*67e74705SXin Li case NeonTypeFlags::Poly128:
2541*67e74705SXin Li // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2542*67e74705SXin Li // There is a lot of i128 and f128 API missing.
2543*67e74705SXin Li // so we use v16i8 to represent poly128 and get pattern matched.
2544*67e74705SXin Li return llvm::VectorType::get(CGF->Int8Ty, 16);
2545*67e74705SXin Li case NeonTypeFlags::Float32:
2546*67e74705SXin Li return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2547*67e74705SXin Li case NeonTypeFlags::Float64:
2548*67e74705SXin Li return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2549*67e74705SXin Li }
2550*67e74705SXin Li llvm_unreachable("Unknown vector element type!");
2551*67e74705SXin Li }
2552*67e74705SXin Li
GetFloatNeonType(CodeGenFunction * CGF,NeonTypeFlags IntTypeFlags)2553*67e74705SXin Li static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2554*67e74705SXin Li NeonTypeFlags IntTypeFlags) {
2555*67e74705SXin Li int IsQuad = IntTypeFlags.isQuad();
2556*67e74705SXin Li switch (IntTypeFlags.getEltType()) {
2557*67e74705SXin Li case NeonTypeFlags::Int32:
2558*67e74705SXin Li return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2559*67e74705SXin Li case NeonTypeFlags::Int64:
2560*67e74705SXin Li return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2561*67e74705SXin Li default:
2562*67e74705SXin Li llvm_unreachable("Type can't be converted to floating-point!");
2563*67e74705SXin Li }
2564*67e74705SXin Li }
2565*67e74705SXin Li
EmitNeonSplat(Value * V,Constant * C)2566*67e74705SXin Li Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2567*67e74705SXin Li unsigned nElts = V->getType()->getVectorNumElements();
2568*67e74705SXin Li Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2569*67e74705SXin Li return Builder.CreateShuffleVector(V, V, SV, "lane");
2570*67e74705SXin Li }
2571*67e74705SXin Li
EmitNeonCall(Function * F,SmallVectorImpl<Value * > & Ops,const char * name,unsigned shift,bool rightshift)2572*67e74705SXin Li Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2573*67e74705SXin Li const char *name,
2574*67e74705SXin Li unsigned shift, bool rightshift) {
2575*67e74705SXin Li unsigned j = 0;
2576*67e74705SXin Li for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2577*67e74705SXin Li ai != ae; ++ai, ++j)
2578*67e74705SXin Li if (shift > 0 && shift == j)
2579*67e74705SXin Li Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
2580*67e74705SXin Li else
2581*67e74705SXin Li Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
2582*67e74705SXin Li
2583*67e74705SXin Li return Builder.CreateCall(F, Ops, name);
2584*67e74705SXin Li }
2585*67e74705SXin Li
EmitNeonShiftVector(Value * V,llvm::Type * Ty,bool neg)2586*67e74705SXin Li Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
2587*67e74705SXin Li bool neg) {
2588*67e74705SXin Li int SV = cast<ConstantInt>(V)->getSExtValue();
2589*67e74705SXin Li return ConstantInt::get(Ty, neg ? -SV : SV);
2590*67e74705SXin Li }
2591*67e74705SXin Li
2592*67e74705SXin Li // \brief Right-shift a vector by a constant.
EmitNeonRShiftImm(Value * Vec,Value * Shift,llvm::Type * Ty,bool usgn,const char * name)2593*67e74705SXin Li Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
2594*67e74705SXin Li llvm::Type *Ty, bool usgn,
2595*67e74705SXin Li const char *name) {
2596*67e74705SXin Li llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2597*67e74705SXin Li
2598*67e74705SXin Li int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
2599*67e74705SXin Li int EltSize = VTy->getScalarSizeInBits();
2600*67e74705SXin Li
2601*67e74705SXin Li Vec = Builder.CreateBitCast(Vec, Ty);
2602*67e74705SXin Li
2603*67e74705SXin Li // lshr/ashr are undefined when the shift amount is equal to the vector
2604*67e74705SXin Li // element size.
2605*67e74705SXin Li if (ShiftAmt == EltSize) {
2606*67e74705SXin Li if (usgn) {
2607*67e74705SXin Li // Right-shifting an unsigned value by its size yields 0.
2608*67e74705SXin Li return llvm::ConstantAggregateZero::get(VTy);
2609*67e74705SXin Li } else {
2610*67e74705SXin Li // Right-shifting a signed value by its size is equivalent
2611*67e74705SXin Li // to a shift of size-1.
2612*67e74705SXin Li --ShiftAmt;
2613*67e74705SXin Li Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
2614*67e74705SXin Li }
2615*67e74705SXin Li }
2616*67e74705SXin Li
2617*67e74705SXin Li Shift = EmitNeonShiftVector(Shift, Ty, false);
2618*67e74705SXin Li if (usgn)
2619*67e74705SXin Li return Builder.CreateLShr(Vec, Shift, name);
2620*67e74705SXin Li else
2621*67e74705SXin Li return Builder.CreateAShr(Vec, Shift, name);
2622*67e74705SXin Li }
2623*67e74705SXin Li
2624*67e74705SXin Li enum {
2625*67e74705SXin Li AddRetType = (1 << 0),
2626*67e74705SXin Li Add1ArgType = (1 << 1),
2627*67e74705SXin Li Add2ArgTypes = (1 << 2),
2628*67e74705SXin Li
2629*67e74705SXin Li VectorizeRetType = (1 << 3),
2630*67e74705SXin Li VectorizeArgTypes = (1 << 4),
2631*67e74705SXin Li
2632*67e74705SXin Li InventFloatType = (1 << 5),
2633*67e74705SXin Li UnsignedAlts = (1 << 6),
2634*67e74705SXin Li
2635*67e74705SXin Li Use64BitVectors = (1 << 7),
2636*67e74705SXin Li Use128BitVectors = (1 << 8),
2637*67e74705SXin Li
2638*67e74705SXin Li Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
2639*67e74705SXin Li VectorRet = AddRetType | VectorizeRetType,
2640*67e74705SXin Li VectorRetGetArgs01 =
2641*67e74705SXin Li AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
2642*67e74705SXin Li FpCmpzModifiers =
2643*67e74705SXin Li AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
2644*67e74705SXin Li };
2645*67e74705SXin Li
2646*67e74705SXin Li namespace {
2647*67e74705SXin Li struct NeonIntrinsicInfo {
2648*67e74705SXin Li const char *NameHint;
2649*67e74705SXin Li unsigned BuiltinID;
2650*67e74705SXin Li unsigned LLVMIntrinsic;
2651*67e74705SXin Li unsigned AltLLVMIntrinsic;
2652*67e74705SXin Li unsigned TypeModifier;
2653*67e74705SXin Li
operator <__anonea9a27570311::NeonIntrinsicInfo2654*67e74705SXin Li bool operator<(unsigned RHSBuiltinID) const {
2655*67e74705SXin Li return BuiltinID < RHSBuiltinID;
2656*67e74705SXin Li }
operator <__anonea9a27570311::NeonIntrinsicInfo2657*67e74705SXin Li bool operator<(const NeonIntrinsicInfo &TE) const {
2658*67e74705SXin Li return BuiltinID < TE.BuiltinID;
2659*67e74705SXin Li }
2660*67e74705SXin Li };
2661*67e74705SXin Li } // end anonymous namespace
2662*67e74705SXin Li
2663*67e74705SXin Li #define NEONMAP0(NameBase) \
2664*67e74705SXin Li { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
2665*67e74705SXin Li
2666*67e74705SXin Li #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2667*67e74705SXin Li { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2668*67e74705SXin Li Intrinsic::LLVMIntrinsic, 0, TypeModifier }
2669*67e74705SXin Li
2670*67e74705SXin Li #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2671*67e74705SXin Li { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2672*67e74705SXin Li Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
2673*67e74705SXin Li TypeModifier }
2674*67e74705SXin Li
2675*67e74705SXin Li static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
2676*67e74705SXin Li NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2677*67e74705SXin Li NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2678*67e74705SXin Li NEONMAP1(vabs_v, arm_neon_vabs, 0),
2679*67e74705SXin Li NEONMAP1(vabsq_v, arm_neon_vabs, 0),
2680*67e74705SXin Li NEONMAP0(vaddhn_v),
2681*67e74705SXin Li NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
2682*67e74705SXin Li NEONMAP1(vaeseq_v, arm_neon_aese, 0),
2683*67e74705SXin Li NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
2684*67e74705SXin Li NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
2685*67e74705SXin Li NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
2686*67e74705SXin Li NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
2687*67e74705SXin Li NEONMAP1(vcage_v, arm_neon_vacge, 0),
2688*67e74705SXin Li NEONMAP1(vcageq_v, arm_neon_vacge, 0),
2689*67e74705SXin Li NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
2690*67e74705SXin Li NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
2691*67e74705SXin Li NEONMAP1(vcale_v, arm_neon_vacge, 0),
2692*67e74705SXin Li NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
2693*67e74705SXin Li NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
2694*67e74705SXin Li NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
2695*67e74705SXin Li NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
2696*67e74705SXin Li NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
2697*67e74705SXin Li NEONMAP1(vclz_v, ctlz, Add1ArgType),
2698*67e74705SXin Li NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2699*67e74705SXin Li NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2700*67e74705SXin Li NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2701*67e74705SXin Li NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
2702*67e74705SXin Li NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
2703*67e74705SXin Li NEONMAP0(vcvt_f32_v),
2704*67e74705SXin Li NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2705*67e74705SXin Li NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2706*67e74705SXin Li NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2707*67e74705SXin Li NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2708*67e74705SXin Li NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2709*67e74705SXin Li NEONMAP0(vcvt_s32_v),
2710*67e74705SXin Li NEONMAP0(vcvt_s64_v),
2711*67e74705SXin Li NEONMAP0(vcvt_u32_v),
2712*67e74705SXin Li NEONMAP0(vcvt_u64_v),
2713*67e74705SXin Li NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
2714*67e74705SXin Li NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
2715*67e74705SXin Li NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
2716*67e74705SXin Li NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
2717*67e74705SXin Li NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
2718*67e74705SXin Li NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
2719*67e74705SXin Li NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
2720*67e74705SXin Li NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
2721*67e74705SXin Li NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
2722*67e74705SXin Li NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
2723*67e74705SXin Li NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
2724*67e74705SXin Li NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
2725*67e74705SXin Li NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
2726*67e74705SXin Li NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
2727*67e74705SXin Li NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
2728*67e74705SXin Li NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
2729*67e74705SXin Li NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
2730*67e74705SXin Li NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
2731*67e74705SXin Li NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
2732*67e74705SXin Li NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
2733*67e74705SXin Li NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
2734*67e74705SXin Li NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
2735*67e74705SXin Li NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
2736*67e74705SXin Li NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
2737*67e74705SXin Li NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
2738*67e74705SXin Li NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
2739*67e74705SXin Li NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
2740*67e74705SXin Li NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
2741*67e74705SXin Li NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
2742*67e74705SXin Li NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
2743*67e74705SXin Li NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
2744*67e74705SXin Li NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
2745*67e74705SXin Li NEONMAP0(vcvtq_f32_v),
2746*67e74705SXin Li NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2747*67e74705SXin Li NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2748*67e74705SXin Li NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2749*67e74705SXin Li NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2750*67e74705SXin Li NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2751*67e74705SXin Li NEONMAP0(vcvtq_s32_v),
2752*67e74705SXin Li NEONMAP0(vcvtq_s64_v),
2753*67e74705SXin Li NEONMAP0(vcvtq_u32_v),
2754*67e74705SXin Li NEONMAP0(vcvtq_u64_v),
2755*67e74705SXin Li NEONMAP0(vext_v),
2756*67e74705SXin Li NEONMAP0(vextq_v),
2757*67e74705SXin Li NEONMAP0(vfma_v),
2758*67e74705SXin Li NEONMAP0(vfmaq_v),
2759*67e74705SXin Li NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2760*67e74705SXin Li NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2761*67e74705SXin Li NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2762*67e74705SXin Li NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2763*67e74705SXin Li NEONMAP0(vld1_dup_v),
2764*67e74705SXin Li NEONMAP1(vld1_v, arm_neon_vld1, 0),
2765*67e74705SXin Li NEONMAP0(vld1q_dup_v),
2766*67e74705SXin Li NEONMAP1(vld1q_v, arm_neon_vld1, 0),
2767*67e74705SXin Li NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
2768*67e74705SXin Li NEONMAP1(vld2_v, arm_neon_vld2, 0),
2769*67e74705SXin Li NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
2770*67e74705SXin Li NEONMAP1(vld2q_v, arm_neon_vld2, 0),
2771*67e74705SXin Li NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
2772*67e74705SXin Li NEONMAP1(vld3_v, arm_neon_vld3, 0),
2773*67e74705SXin Li NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
2774*67e74705SXin Li NEONMAP1(vld3q_v, arm_neon_vld3, 0),
2775*67e74705SXin Li NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
2776*67e74705SXin Li NEONMAP1(vld4_v, arm_neon_vld4, 0),
2777*67e74705SXin Li NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
2778*67e74705SXin Li NEONMAP1(vld4q_v, arm_neon_vld4, 0),
2779*67e74705SXin Li NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2780*67e74705SXin Li NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
2781*67e74705SXin Li NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
2782*67e74705SXin Li NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2783*67e74705SXin Li NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2784*67e74705SXin Li NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
2785*67e74705SXin Li NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
2786*67e74705SXin Li NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2787*67e74705SXin Li NEONMAP0(vmovl_v),
2788*67e74705SXin Li NEONMAP0(vmovn_v),
2789*67e74705SXin Li NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
2790*67e74705SXin Li NEONMAP0(vmull_v),
2791*67e74705SXin Li NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
2792*67e74705SXin Li NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2793*67e74705SXin Li NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2794*67e74705SXin Li NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
2795*67e74705SXin Li NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2796*67e74705SXin Li NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2797*67e74705SXin Li NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
2798*67e74705SXin Li NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
2799*67e74705SXin Li NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
2800*67e74705SXin Li NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
2801*67e74705SXin Li NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
2802*67e74705SXin Li NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2803*67e74705SXin Li NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2804*67e74705SXin Li NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
2805*67e74705SXin Li NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
2806*67e74705SXin Li NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
2807*67e74705SXin Li NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
2808*67e74705SXin Li NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
2809*67e74705SXin Li NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
2810*67e74705SXin Li NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
2811*67e74705SXin Li NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
2812*67e74705SXin Li NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
2813*67e74705SXin Li NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
2814*67e74705SXin Li NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
2815*67e74705SXin Li NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2816*67e74705SXin Li NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2817*67e74705SXin Li NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2818*67e74705SXin Li NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2819*67e74705SXin Li NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2820*67e74705SXin Li NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2821*67e74705SXin Li NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
2822*67e74705SXin Li NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
2823*67e74705SXin Li NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2824*67e74705SXin Li NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2825*67e74705SXin Li NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
2826*67e74705SXin Li NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2827*67e74705SXin Li NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2828*67e74705SXin Li NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
2829*67e74705SXin Li NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
2830*67e74705SXin Li NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2831*67e74705SXin Li NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2832*67e74705SXin Li NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
2833*67e74705SXin Li NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
2834*67e74705SXin Li NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
2835*67e74705SXin Li NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
2836*67e74705SXin Li NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
2837*67e74705SXin Li NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
2838*67e74705SXin Li NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
2839*67e74705SXin Li NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
2840*67e74705SXin Li NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
2841*67e74705SXin Li NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
2842*67e74705SXin Li NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
2843*67e74705SXin Li NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
2844*67e74705SXin Li NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2845*67e74705SXin Li NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2846*67e74705SXin Li NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2847*67e74705SXin Li NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2848*67e74705SXin Li NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2849*67e74705SXin Li NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2850*67e74705SXin Li NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
2851*67e74705SXin Li NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
2852*67e74705SXin Li NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
2853*67e74705SXin Li NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
2854*67e74705SXin Li NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
2855*67e74705SXin Li NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
2856*67e74705SXin Li NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
2857*67e74705SXin Li NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
2858*67e74705SXin Li NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
2859*67e74705SXin Li NEONMAP0(vshl_n_v),
2860*67e74705SXin Li NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2861*67e74705SXin Li NEONMAP0(vshll_n_v),
2862*67e74705SXin Li NEONMAP0(vshlq_n_v),
2863*67e74705SXin Li NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2864*67e74705SXin Li NEONMAP0(vshr_n_v),
2865*67e74705SXin Li NEONMAP0(vshrn_n_v),
2866*67e74705SXin Li NEONMAP0(vshrq_n_v),
2867*67e74705SXin Li NEONMAP1(vst1_v, arm_neon_vst1, 0),
2868*67e74705SXin Li NEONMAP1(vst1q_v, arm_neon_vst1, 0),
2869*67e74705SXin Li NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
2870*67e74705SXin Li NEONMAP1(vst2_v, arm_neon_vst2, 0),
2871*67e74705SXin Li NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
2872*67e74705SXin Li NEONMAP1(vst2q_v, arm_neon_vst2, 0),
2873*67e74705SXin Li NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
2874*67e74705SXin Li NEONMAP1(vst3_v, arm_neon_vst3, 0),
2875*67e74705SXin Li NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
2876*67e74705SXin Li NEONMAP1(vst3q_v, arm_neon_vst3, 0),
2877*67e74705SXin Li NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
2878*67e74705SXin Li NEONMAP1(vst4_v, arm_neon_vst4, 0),
2879*67e74705SXin Li NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
2880*67e74705SXin Li NEONMAP1(vst4q_v, arm_neon_vst4, 0),
2881*67e74705SXin Li NEONMAP0(vsubhn_v),
2882*67e74705SXin Li NEONMAP0(vtrn_v),
2883*67e74705SXin Li NEONMAP0(vtrnq_v),
2884*67e74705SXin Li NEONMAP0(vtst_v),
2885*67e74705SXin Li NEONMAP0(vtstq_v),
2886*67e74705SXin Li NEONMAP0(vuzp_v),
2887*67e74705SXin Li NEONMAP0(vuzpq_v),
2888*67e74705SXin Li NEONMAP0(vzip_v),
2889*67e74705SXin Li NEONMAP0(vzipq_v)
2890*67e74705SXin Li };
2891*67e74705SXin Li
2892*67e74705SXin Li static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
2893*67e74705SXin Li NEONMAP1(vabs_v, aarch64_neon_abs, 0),
2894*67e74705SXin Li NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
2895*67e74705SXin Li NEONMAP0(vaddhn_v),
2896*67e74705SXin Li NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
2897*67e74705SXin Li NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
2898*67e74705SXin Li NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
2899*67e74705SXin Li NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
2900*67e74705SXin Li NEONMAP1(vcage_v, aarch64_neon_facge, 0),
2901*67e74705SXin Li NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
2902*67e74705SXin Li NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
2903*67e74705SXin Li NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
2904*67e74705SXin Li NEONMAP1(vcale_v, aarch64_neon_facge, 0),
2905*67e74705SXin Li NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
2906*67e74705SXin Li NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
2907*67e74705SXin Li NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
2908*67e74705SXin Li NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
2909*67e74705SXin Li NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
2910*67e74705SXin Li NEONMAP1(vclz_v, ctlz, Add1ArgType),
2911*67e74705SXin Li NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2912*67e74705SXin Li NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2913*67e74705SXin Li NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2914*67e74705SXin Li NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
2915*67e74705SXin Li NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
2916*67e74705SXin Li NEONMAP0(vcvt_f32_v),
2917*67e74705SXin Li NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2918*67e74705SXin Li NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2919*67e74705SXin Li NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2920*67e74705SXin Li NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2921*67e74705SXin Li NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2922*67e74705SXin Li NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2923*67e74705SXin Li NEONMAP0(vcvtq_f32_v),
2924*67e74705SXin Li NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2925*67e74705SXin Li NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2926*67e74705SXin Li NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2927*67e74705SXin Li NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2928*67e74705SXin Li NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2929*67e74705SXin Li NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2930*67e74705SXin Li NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
2931*67e74705SXin Li NEONMAP0(vext_v),
2932*67e74705SXin Li NEONMAP0(vextq_v),
2933*67e74705SXin Li NEONMAP0(vfma_v),
2934*67e74705SXin Li NEONMAP0(vfmaq_v),
2935*67e74705SXin Li NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2936*67e74705SXin Li NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2937*67e74705SXin Li NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2938*67e74705SXin Li NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2939*67e74705SXin Li NEONMAP0(vmovl_v),
2940*67e74705SXin Li NEONMAP0(vmovn_v),
2941*67e74705SXin Li NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
2942*67e74705SXin Li NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
2943*67e74705SXin Li NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
2944*67e74705SXin Li NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2945*67e74705SXin Li NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2946*67e74705SXin Li NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
2947*67e74705SXin Li NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
2948*67e74705SXin Li NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
2949*67e74705SXin Li NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2950*67e74705SXin Li NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2951*67e74705SXin Li NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
2952*67e74705SXin Li NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
2953*67e74705SXin Li NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
2954*67e74705SXin Li NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
2955*67e74705SXin Li NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
2956*67e74705SXin Li NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
2957*67e74705SXin Li NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
2958*67e74705SXin Li NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
2959*67e74705SXin Li NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
2960*67e74705SXin Li NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
2961*67e74705SXin Li NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
2962*67e74705SXin Li NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2963*67e74705SXin Li NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2964*67e74705SXin Li NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
2965*67e74705SXin Li NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2966*67e74705SXin Li NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
2967*67e74705SXin Li NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2968*67e74705SXin Li NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
2969*67e74705SXin Li NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
2970*67e74705SXin Li NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2971*67e74705SXin Li NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2972*67e74705SXin Li NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
2973*67e74705SXin Li NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2974*67e74705SXin Li NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2975*67e74705SXin Li NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
2976*67e74705SXin Li NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
2977*67e74705SXin Li NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2978*67e74705SXin Li NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2979*67e74705SXin Li NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2980*67e74705SXin Li NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2981*67e74705SXin Li NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2982*67e74705SXin Li NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2983*67e74705SXin Li NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2984*67e74705SXin Li NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2985*67e74705SXin Li NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
2986*67e74705SXin Li NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
2987*67e74705SXin Li NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
2988*67e74705SXin Li NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
2989*67e74705SXin Li NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
2990*67e74705SXin Li NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
2991*67e74705SXin Li NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
2992*67e74705SXin Li NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
2993*67e74705SXin Li NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
2994*67e74705SXin Li NEONMAP0(vshl_n_v),
2995*67e74705SXin Li NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2996*67e74705SXin Li NEONMAP0(vshll_n_v),
2997*67e74705SXin Li NEONMAP0(vshlq_n_v),
2998*67e74705SXin Li NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2999*67e74705SXin Li NEONMAP0(vshr_n_v),
3000*67e74705SXin Li NEONMAP0(vshrn_n_v),
3001*67e74705SXin Li NEONMAP0(vshrq_n_v),
3002*67e74705SXin Li NEONMAP0(vsubhn_v),
3003*67e74705SXin Li NEONMAP0(vtst_v),
3004*67e74705SXin Li NEONMAP0(vtstq_v),
3005*67e74705SXin Li };
3006*67e74705SXin Li
3007*67e74705SXin Li static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3008*67e74705SXin Li NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3009*67e74705SXin Li NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3010*67e74705SXin Li NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3011*67e74705SXin Li NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3012*67e74705SXin Li NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3013*67e74705SXin Li NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3014*67e74705SXin Li NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3015*67e74705SXin Li NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3016*67e74705SXin Li NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3017*67e74705SXin Li NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3018*67e74705SXin Li NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3019*67e74705SXin Li NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3020*67e74705SXin Li NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3021*67e74705SXin Li NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3022*67e74705SXin Li NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3023*67e74705SXin Li NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3024*67e74705SXin Li NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3025*67e74705SXin Li NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3026*67e74705SXin Li NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3027*67e74705SXin Li NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3028*67e74705SXin Li NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3029*67e74705SXin Li NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3030*67e74705SXin Li NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3031*67e74705SXin Li NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3032*67e74705SXin Li NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3033*67e74705SXin Li NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3034*67e74705SXin Li NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3035*67e74705SXin Li NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3036*67e74705SXin Li NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3037*67e74705SXin Li NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3038*67e74705SXin Li NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3039*67e74705SXin Li NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3040*67e74705SXin Li NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3041*67e74705SXin Li NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3042*67e74705SXin Li NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3043*67e74705SXin Li NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3044*67e74705SXin Li NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3045*67e74705SXin Li NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3046*67e74705SXin Li NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3047*67e74705SXin Li NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3048*67e74705SXin Li NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3049*67e74705SXin Li NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3050*67e74705SXin Li NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3051*67e74705SXin Li NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3052*67e74705SXin Li NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3053*67e74705SXin Li NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3054*67e74705SXin Li NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3055*67e74705SXin Li NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3056*67e74705SXin Li NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3057*67e74705SXin Li NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3058*67e74705SXin Li NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3059*67e74705SXin Li NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3060*67e74705SXin Li NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3061*67e74705SXin Li NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3062*67e74705SXin Li NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3063*67e74705SXin Li NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3064*67e74705SXin Li NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3065*67e74705SXin Li NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3066*67e74705SXin Li NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3067*67e74705SXin Li NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3068*67e74705SXin Li NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3069*67e74705SXin Li NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3070*67e74705SXin Li NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3071*67e74705SXin Li NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3072*67e74705SXin Li NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3073*67e74705SXin Li NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3074*67e74705SXin Li NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3075*67e74705SXin Li NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3076*67e74705SXin Li NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3077*67e74705SXin Li NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3078*67e74705SXin Li NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3079*67e74705SXin Li NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3080*67e74705SXin Li NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3081*67e74705SXin Li NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3082*67e74705SXin Li NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3083*67e74705SXin Li NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3084*67e74705SXin Li NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3085*67e74705SXin Li NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3086*67e74705SXin Li NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3087*67e74705SXin Li NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3088*67e74705SXin Li NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3089*67e74705SXin Li NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3090*67e74705SXin Li NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3091*67e74705SXin Li NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3092*67e74705SXin Li NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3093*67e74705SXin Li NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3094*67e74705SXin Li NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3095*67e74705SXin Li NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3096*67e74705SXin Li NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3097*67e74705SXin Li NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3098*67e74705SXin Li NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3099*67e74705SXin Li NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3100*67e74705SXin Li NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3101*67e74705SXin Li NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3102*67e74705SXin Li NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3103*67e74705SXin Li NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3104*67e74705SXin Li NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3105*67e74705SXin Li NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3106*67e74705SXin Li NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3107*67e74705SXin Li NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3108*67e74705SXin Li NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3109*67e74705SXin Li NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3110*67e74705SXin Li NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3111*67e74705SXin Li NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3112*67e74705SXin Li NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3113*67e74705SXin Li NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3114*67e74705SXin Li NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3115*67e74705SXin Li NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3116*67e74705SXin Li NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3117*67e74705SXin Li NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3118*67e74705SXin Li NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3119*67e74705SXin Li NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3120*67e74705SXin Li NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3121*67e74705SXin Li NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3122*67e74705SXin Li NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3123*67e74705SXin Li NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3124*67e74705SXin Li NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3125*67e74705SXin Li NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3126*67e74705SXin Li NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3127*67e74705SXin Li NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3128*67e74705SXin Li NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3129*67e74705SXin Li NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3130*67e74705SXin Li NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3131*67e74705SXin Li NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3132*67e74705SXin Li NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3133*67e74705SXin Li NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3134*67e74705SXin Li NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3135*67e74705SXin Li NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3136*67e74705SXin Li NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3137*67e74705SXin Li NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3138*67e74705SXin Li NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3139*67e74705SXin Li NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3140*67e74705SXin Li NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3141*67e74705SXin Li NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3142*67e74705SXin Li NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3143*67e74705SXin Li NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3144*67e74705SXin Li NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3145*67e74705SXin Li NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3146*67e74705SXin Li NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3147*67e74705SXin Li NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3148*67e74705SXin Li NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3149*67e74705SXin Li NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3150*67e74705SXin Li NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3151*67e74705SXin Li NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3152*67e74705SXin Li NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3153*67e74705SXin Li NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3154*67e74705SXin Li NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3155*67e74705SXin Li NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3156*67e74705SXin Li NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3157*67e74705SXin Li NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3158*67e74705SXin Li NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3159*67e74705SXin Li NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3160*67e74705SXin Li NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3161*67e74705SXin Li NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3162*67e74705SXin Li NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3163*67e74705SXin Li NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3164*67e74705SXin Li NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3165*67e74705SXin Li NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3166*67e74705SXin Li NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3167*67e74705SXin Li NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3168*67e74705SXin Li NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3169*67e74705SXin Li NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3170*67e74705SXin Li NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3171*67e74705SXin Li NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3172*67e74705SXin Li NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3173*67e74705SXin Li NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3174*67e74705SXin Li NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3175*67e74705SXin Li NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3176*67e74705SXin Li NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3177*67e74705SXin Li NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3178*67e74705SXin Li NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3179*67e74705SXin Li NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3180*67e74705SXin Li NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3181*67e74705SXin Li NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3182*67e74705SXin Li NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3183*67e74705SXin Li NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3184*67e74705SXin Li NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3185*67e74705SXin Li NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3186*67e74705SXin Li NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3187*67e74705SXin Li NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3188*67e74705SXin Li NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3189*67e74705SXin Li NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3190*67e74705SXin Li NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3191*67e74705SXin Li NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3192*67e74705SXin Li NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3193*67e74705SXin Li NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3194*67e74705SXin Li NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3195*67e74705SXin Li NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3196*67e74705SXin Li NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3197*67e74705SXin Li NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3198*67e74705SXin Li NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3199*67e74705SXin Li NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3200*67e74705SXin Li };
3201*67e74705SXin Li
3202*67e74705SXin Li #undef NEONMAP0
3203*67e74705SXin Li #undef NEONMAP1
3204*67e74705SXin Li #undef NEONMAP2
3205*67e74705SXin Li
3206*67e74705SXin Li static bool NEONSIMDIntrinsicsProvenSorted = false;
3207*67e74705SXin Li
3208*67e74705SXin Li static bool AArch64SIMDIntrinsicsProvenSorted = false;
3209*67e74705SXin Li static bool AArch64SISDIntrinsicsProvenSorted = false;
3210*67e74705SXin Li
3211*67e74705SXin Li
3212*67e74705SXin Li static const NeonIntrinsicInfo *
findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,unsigned BuiltinID,bool & MapProvenSorted)3213*67e74705SXin Li findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3214*67e74705SXin Li unsigned BuiltinID, bool &MapProvenSorted) {
3215*67e74705SXin Li
3216*67e74705SXin Li #ifndef NDEBUG
3217*67e74705SXin Li if (!MapProvenSorted) {
3218*67e74705SXin Li assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3219*67e74705SXin Li MapProvenSorted = true;
3220*67e74705SXin Li }
3221*67e74705SXin Li #endif
3222*67e74705SXin Li
3223*67e74705SXin Li const NeonIntrinsicInfo *Builtin =
3224*67e74705SXin Li std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3225*67e74705SXin Li
3226*67e74705SXin Li if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3227*67e74705SXin Li return Builtin;
3228*67e74705SXin Li
3229*67e74705SXin Li return nullptr;
3230*67e74705SXin Li }
3231*67e74705SXin Li
LookupNeonLLVMIntrinsic(unsigned IntrinsicID,unsigned Modifier,llvm::Type * ArgType,const CallExpr * E)3232*67e74705SXin Li Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3233*67e74705SXin Li unsigned Modifier,
3234*67e74705SXin Li llvm::Type *ArgType,
3235*67e74705SXin Li const CallExpr *E) {
3236*67e74705SXin Li int VectorSize = 0;
3237*67e74705SXin Li if (Modifier & Use64BitVectors)
3238*67e74705SXin Li VectorSize = 64;
3239*67e74705SXin Li else if (Modifier & Use128BitVectors)
3240*67e74705SXin Li VectorSize = 128;
3241*67e74705SXin Li
3242*67e74705SXin Li // Return type.
3243*67e74705SXin Li SmallVector<llvm::Type *, 3> Tys;
3244*67e74705SXin Li if (Modifier & AddRetType) {
3245*67e74705SXin Li llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3246*67e74705SXin Li if (Modifier & VectorizeRetType)
3247*67e74705SXin Li Ty = llvm::VectorType::get(
3248*67e74705SXin Li Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3249*67e74705SXin Li
3250*67e74705SXin Li Tys.push_back(Ty);
3251*67e74705SXin Li }
3252*67e74705SXin Li
3253*67e74705SXin Li // Arguments.
3254*67e74705SXin Li if (Modifier & VectorizeArgTypes) {
3255*67e74705SXin Li int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3256*67e74705SXin Li ArgType = llvm::VectorType::get(ArgType, Elts);
3257*67e74705SXin Li }
3258*67e74705SXin Li
3259*67e74705SXin Li if (Modifier & (Add1ArgType | Add2ArgTypes))
3260*67e74705SXin Li Tys.push_back(ArgType);
3261*67e74705SXin Li
3262*67e74705SXin Li if (Modifier & Add2ArgTypes)
3263*67e74705SXin Li Tys.push_back(ArgType);
3264*67e74705SXin Li
3265*67e74705SXin Li if (Modifier & InventFloatType)
3266*67e74705SXin Li Tys.push_back(FloatTy);
3267*67e74705SXin Li
3268*67e74705SXin Li return CGM.getIntrinsic(IntrinsicID, Tys);
3269*67e74705SXin Li }
3270*67e74705SXin Li
EmitCommonNeonSISDBuiltinExpr(CodeGenFunction & CGF,const NeonIntrinsicInfo & SISDInfo,SmallVectorImpl<Value * > & Ops,const CallExpr * E)3271*67e74705SXin Li static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3272*67e74705SXin Li const NeonIntrinsicInfo &SISDInfo,
3273*67e74705SXin Li SmallVectorImpl<Value *> &Ops,
3274*67e74705SXin Li const CallExpr *E) {
3275*67e74705SXin Li unsigned BuiltinID = SISDInfo.BuiltinID;
3276*67e74705SXin Li unsigned int Int = SISDInfo.LLVMIntrinsic;
3277*67e74705SXin Li unsigned Modifier = SISDInfo.TypeModifier;
3278*67e74705SXin Li const char *s = SISDInfo.NameHint;
3279*67e74705SXin Li
3280*67e74705SXin Li switch (BuiltinID) {
3281*67e74705SXin Li case NEON::BI__builtin_neon_vcled_s64:
3282*67e74705SXin Li case NEON::BI__builtin_neon_vcled_u64:
3283*67e74705SXin Li case NEON::BI__builtin_neon_vcles_f32:
3284*67e74705SXin Li case NEON::BI__builtin_neon_vcled_f64:
3285*67e74705SXin Li case NEON::BI__builtin_neon_vcltd_s64:
3286*67e74705SXin Li case NEON::BI__builtin_neon_vcltd_u64:
3287*67e74705SXin Li case NEON::BI__builtin_neon_vclts_f32:
3288*67e74705SXin Li case NEON::BI__builtin_neon_vcltd_f64:
3289*67e74705SXin Li case NEON::BI__builtin_neon_vcales_f32:
3290*67e74705SXin Li case NEON::BI__builtin_neon_vcaled_f64:
3291*67e74705SXin Li case NEON::BI__builtin_neon_vcalts_f32:
3292*67e74705SXin Li case NEON::BI__builtin_neon_vcaltd_f64:
3293*67e74705SXin Li // Only one direction of comparisons actually exist, cmle is actually a cmge
3294*67e74705SXin Li // with swapped operands. The table gives us the right intrinsic but we
3295*67e74705SXin Li // still need to do the swap.
3296*67e74705SXin Li std::swap(Ops[0], Ops[1]);
3297*67e74705SXin Li break;
3298*67e74705SXin Li }
3299*67e74705SXin Li
3300*67e74705SXin Li assert(Int && "Generic code assumes a valid intrinsic");
3301*67e74705SXin Li
3302*67e74705SXin Li // Determine the type(s) of this overloaded AArch64 intrinsic.
3303*67e74705SXin Li const Expr *Arg = E->getArg(0);
3304*67e74705SXin Li llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3305*67e74705SXin Li Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3306*67e74705SXin Li
3307*67e74705SXin Li int j = 0;
3308*67e74705SXin Li ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3309*67e74705SXin Li for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3310*67e74705SXin Li ai != ae; ++ai, ++j) {
3311*67e74705SXin Li llvm::Type *ArgTy = ai->getType();
3312*67e74705SXin Li if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3313*67e74705SXin Li ArgTy->getPrimitiveSizeInBits())
3314*67e74705SXin Li continue;
3315*67e74705SXin Li
3316*67e74705SXin Li assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3317*67e74705SXin Li // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3318*67e74705SXin Li // it before inserting.
3319*67e74705SXin Li Ops[j] =
3320*67e74705SXin Li CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3321*67e74705SXin Li Ops[j] =
3322*67e74705SXin Li CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3323*67e74705SXin Li }
3324*67e74705SXin Li
3325*67e74705SXin Li Value *Result = CGF.EmitNeonCall(F, Ops, s);
3326*67e74705SXin Li llvm::Type *ResultType = CGF.ConvertType(E->getType());
3327*67e74705SXin Li if (ResultType->getPrimitiveSizeInBits() <
3328*67e74705SXin Li Result->getType()->getPrimitiveSizeInBits())
3329*67e74705SXin Li return CGF.Builder.CreateExtractElement(Result, C0);
3330*67e74705SXin Li
3331*67e74705SXin Li return CGF.Builder.CreateBitCast(Result, ResultType, s);
3332*67e74705SXin Li }
3333*67e74705SXin Li
EmitCommonNeonBuiltinExpr(unsigned BuiltinID,unsigned LLVMIntrinsic,unsigned AltLLVMIntrinsic,const char * NameHint,unsigned Modifier,const CallExpr * E,SmallVectorImpl<llvm::Value * > & Ops,Address PtrOp0,Address PtrOp1)3334*67e74705SXin Li Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3335*67e74705SXin Li unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3336*67e74705SXin Li const char *NameHint, unsigned Modifier, const CallExpr *E,
3337*67e74705SXin Li SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3338*67e74705SXin Li // Get the last argument, which specifies the vector type.
3339*67e74705SXin Li llvm::APSInt NeonTypeConst;
3340*67e74705SXin Li const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3341*67e74705SXin Li if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3342*67e74705SXin Li return nullptr;
3343*67e74705SXin Li
3344*67e74705SXin Li // Determine the type of this overloaded NEON intrinsic.
3345*67e74705SXin Li NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3346*67e74705SXin Li bool Usgn = Type.isUnsigned();
3347*67e74705SXin Li bool Quad = Type.isQuad();
3348*67e74705SXin Li
3349*67e74705SXin Li llvm::VectorType *VTy = GetNeonType(this, Type);
3350*67e74705SXin Li llvm::Type *Ty = VTy;
3351*67e74705SXin Li if (!Ty)
3352*67e74705SXin Li return nullptr;
3353*67e74705SXin Li
3354*67e74705SXin Li auto getAlignmentValue32 = [&](Address addr) -> Value* {
3355*67e74705SXin Li return Builder.getInt32(addr.getAlignment().getQuantity());
3356*67e74705SXin Li };
3357*67e74705SXin Li
3358*67e74705SXin Li unsigned Int = LLVMIntrinsic;
3359*67e74705SXin Li if ((Modifier & UnsignedAlts) && !Usgn)
3360*67e74705SXin Li Int = AltLLVMIntrinsic;
3361*67e74705SXin Li
3362*67e74705SXin Li switch (BuiltinID) {
3363*67e74705SXin Li default: break;
3364*67e74705SXin Li case NEON::BI__builtin_neon_vabs_v:
3365*67e74705SXin Li case NEON::BI__builtin_neon_vabsq_v:
3366*67e74705SXin Li if (VTy->getElementType()->isFloatingPointTy())
3367*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3368*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3369*67e74705SXin Li case NEON::BI__builtin_neon_vaddhn_v: {
3370*67e74705SXin Li llvm::VectorType *SrcTy =
3371*67e74705SXin Li llvm::VectorType::getExtendedElementVectorType(VTy);
3372*67e74705SXin Li
3373*67e74705SXin Li // %sum = add <4 x i32> %lhs, %rhs
3374*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3375*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3376*67e74705SXin Li Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3377*67e74705SXin Li
3378*67e74705SXin Li // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3379*67e74705SXin Li Constant *ShiftAmt =
3380*67e74705SXin Li ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3381*67e74705SXin Li Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3382*67e74705SXin Li
3383*67e74705SXin Li // %res = trunc <4 x i32> %high to <4 x i16>
3384*67e74705SXin Li return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3385*67e74705SXin Li }
3386*67e74705SXin Li case NEON::BI__builtin_neon_vcale_v:
3387*67e74705SXin Li case NEON::BI__builtin_neon_vcaleq_v:
3388*67e74705SXin Li case NEON::BI__builtin_neon_vcalt_v:
3389*67e74705SXin Li case NEON::BI__builtin_neon_vcaltq_v:
3390*67e74705SXin Li std::swap(Ops[0], Ops[1]);
3391*67e74705SXin Li case NEON::BI__builtin_neon_vcage_v:
3392*67e74705SXin Li case NEON::BI__builtin_neon_vcageq_v:
3393*67e74705SXin Li case NEON::BI__builtin_neon_vcagt_v:
3394*67e74705SXin Li case NEON::BI__builtin_neon_vcagtq_v: {
3395*67e74705SXin Li llvm::Type *VecFlt = llvm::VectorType::get(
3396*67e74705SXin Li VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3397*67e74705SXin Li VTy->getNumElements());
3398*67e74705SXin Li llvm::Type *Tys[] = { VTy, VecFlt };
3399*67e74705SXin Li Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3400*67e74705SXin Li return EmitNeonCall(F, Ops, NameHint);
3401*67e74705SXin Li }
3402*67e74705SXin Li case NEON::BI__builtin_neon_vclz_v:
3403*67e74705SXin Li case NEON::BI__builtin_neon_vclzq_v:
3404*67e74705SXin Li // We generate target-independent intrinsic, which needs a second argument
3405*67e74705SXin Li // for whether or not clz of zero is undefined; on ARM it isn't.
3406*67e74705SXin Li Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3407*67e74705SXin Li break;
3408*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_f32_v:
3409*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_f32_v:
3410*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3411*67e74705SXin Li Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3412*67e74705SXin Li return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3413*67e74705SXin Li : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3414*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_n_f32_v:
3415*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_n_f64_v:
3416*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3417*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3418*67e74705SXin Li llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3419*67e74705SXin Li Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3420*67e74705SXin Li Function *F = CGM.getIntrinsic(Int, Tys);
3421*67e74705SXin Li return EmitNeonCall(F, Ops, "vcvt_n");
3422*67e74705SXin Li }
3423*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_n_s32_v:
3424*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_n_u32_v:
3425*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_n_s64_v:
3426*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_n_u64_v:
3427*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3428*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3429*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3430*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3431*67e74705SXin Li llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3432*67e74705SXin Li Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3433*67e74705SXin Li return EmitNeonCall(F, Ops, "vcvt_n");
3434*67e74705SXin Li }
3435*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_s32_v:
3436*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_u32_v:
3437*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_s64_v:
3438*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_u64_v:
3439*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_s32_v:
3440*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_u32_v:
3441*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_s64_v:
3442*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_u64_v: {
3443*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3444*67e74705SXin Li return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3445*67e74705SXin Li : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3446*67e74705SXin Li }
3447*67e74705SXin Li case NEON::BI__builtin_neon_vcvta_s32_v:
3448*67e74705SXin Li case NEON::BI__builtin_neon_vcvta_s64_v:
3449*67e74705SXin Li case NEON::BI__builtin_neon_vcvta_u32_v:
3450*67e74705SXin Li case NEON::BI__builtin_neon_vcvta_u64_v:
3451*67e74705SXin Li case NEON::BI__builtin_neon_vcvtaq_s32_v:
3452*67e74705SXin Li case NEON::BI__builtin_neon_vcvtaq_s64_v:
3453*67e74705SXin Li case NEON::BI__builtin_neon_vcvtaq_u32_v:
3454*67e74705SXin Li case NEON::BI__builtin_neon_vcvtaq_u64_v:
3455*67e74705SXin Li case NEON::BI__builtin_neon_vcvtn_s32_v:
3456*67e74705SXin Li case NEON::BI__builtin_neon_vcvtn_s64_v:
3457*67e74705SXin Li case NEON::BI__builtin_neon_vcvtn_u32_v:
3458*67e74705SXin Li case NEON::BI__builtin_neon_vcvtn_u64_v:
3459*67e74705SXin Li case NEON::BI__builtin_neon_vcvtnq_s32_v:
3460*67e74705SXin Li case NEON::BI__builtin_neon_vcvtnq_s64_v:
3461*67e74705SXin Li case NEON::BI__builtin_neon_vcvtnq_u32_v:
3462*67e74705SXin Li case NEON::BI__builtin_neon_vcvtnq_u64_v:
3463*67e74705SXin Li case NEON::BI__builtin_neon_vcvtp_s32_v:
3464*67e74705SXin Li case NEON::BI__builtin_neon_vcvtp_s64_v:
3465*67e74705SXin Li case NEON::BI__builtin_neon_vcvtp_u32_v:
3466*67e74705SXin Li case NEON::BI__builtin_neon_vcvtp_u64_v:
3467*67e74705SXin Li case NEON::BI__builtin_neon_vcvtpq_s32_v:
3468*67e74705SXin Li case NEON::BI__builtin_neon_vcvtpq_s64_v:
3469*67e74705SXin Li case NEON::BI__builtin_neon_vcvtpq_u32_v:
3470*67e74705SXin Li case NEON::BI__builtin_neon_vcvtpq_u64_v:
3471*67e74705SXin Li case NEON::BI__builtin_neon_vcvtm_s32_v:
3472*67e74705SXin Li case NEON::BI__builtin_neon_vcvtm_s64_v:
3473*67e74705SXin Li case NEON::BI__builtin_neon_vcvtm_u32_v:
3474*67e74705SXin Li case NEON::BI__builtin_neon_vcvtm_u64_v:
3475*67e74705SXin Li case NEON::BI__builtin_neon_vcvtmq_s32_v:
3476*67e74705SXin Li case NEON::BI__builtin_neon_vcvtmq_s64_v:
3477*67e74705SXin Li case NEON::BI__builtin_neon_vcvtmq_u32_v:
3478*67e74705SXin Li case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3479*67e74705SXin Li llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3480*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3481*67e74705SXin Li }
3482*67e74705SXin Li case NEON::BI__builtin_neon_vext_v:
3483*67e74705SXin Li case NEON::BI__builtin_neon_vextq_v: {
3484*67e74705SXin Li int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3485*67e74705SXin Li SmallVector<uint32_t, 16> Indices;
3486*67e74705SXin Li for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3487*67e74705SXin Li Indices.push_back(i+CV);
3488*67e74705SXin Li
3489*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3490*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3491*67e74705SXin Li return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3492*67e74705SXin Li }
3493*67e74705SXin Li case NEON::BI__builtin_neon_vfma_v:
3494*67e74705SXin Li case NEON::BI__builtin_neon_vfmaq_v: {
3495*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3496*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3497*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3498*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3499*67e74705SXin Li
3500*67e74705SXin Li // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3501*67e74705SXin Li return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3502*67e74705SXin Li }
3503*67e74705SXin Li case NEON::BI__builtin_neon_vld1_v:
3504*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_v: {
3505*67e74705SXin Li llvm::Type *Tys[] = {Ty, Int8PtrTy};
3506*67e74705SXin Li Ops.push_back(getAlignmentValue32(PtrOp0));
3507*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3508*67e74705SXin Li }
3509*67e74705SXin Li case NEON::BI__builtin_neon_vld2_v:
3510*67e74705SXin Li case NEON::BI__builtin_neon_vld2q_v:
3511*67e74705SXin Li case NEON::BI__builtin_neon_vld3_v:
3512*67e74705SXin Li case NEON::BI__builtin_neon_vld3q_v:
3513*67e74705SXin Li case NEON::BI__builtin_neon_vld4_v:
3514*67e74705SXin Li case NEON::BI__builtin_neon_vld4q_v: {
3515*67e74705SXin Li llvm::Type *Tys[] = {Ty, Int8PtrTy};
3516*67e74705SXin Li Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3517*67e74705SXin Li Value *Align = getAlignmentValue32(PtrOp1);
3518*67e74705SXin Li Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3519*67e74705SXin Li Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3520*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3521*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3522*67e74705SXin Li }
3523*67e74705SXin Li case NEON::BI__builtin_neon_vld1_dup_v:
3524*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_dup_v: {
3525*67e74705SXin Li Value *V = UndefValue::get(Ty);
3526*67e74705SXin Li Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3527*67e74705SXin Li PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3528*67e74705SXin Li LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3529*67e74705SXin Li llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3530*67e74705SXin Li Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3531*67e74705SXin Li return EmitNeonSplat(Ops[0], CI);
3532*67e74705SXin Li }
3533*67e74705SXin Li case NEON::BI__builtin_neon_vld2_lane_v:
3534*67e74705SXin Li case NEON::BI__builtin_neon_vld2q_lane_v:
3535*67e74705SXin Li case NEON::BI__builtin_neon_vld3_lane_v:
3536*67e74705SXin Li case NEON::BI__builtin_neon_vld3q_lane_v:
3537*67e74705SXin Li case NEON::BI__builtin_neon_vld4_lane_v:
3538*67e74705SXin Li case NEON::BI__builtin_neon_vld4q_lane_v: {
3539*67e74705SXin Li llvm::Type *Tys[] = {Ty, Int8PtrTy};
3540*67e74705SXin Li Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3541*67e74705SXin Li for (unsigned I = 2; I < Ops.size() - 1; ++I)
3542*67e74705SXin Li Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3543*67e74705SXin Li Ops.push_back(getAlignmentValue32(PtrOp1));
3544*67e74705SXin Li Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3545*67e74705SXin Li Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3546*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3547*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3548*67e74705SXin Li }
3549*67e74705SXin Li case NEON::BI__builtin_neon_vmovl_v: {
3550*67e74705SXin Li llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3551*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3552*67e74705SXin Li if (Usgn)
3553*67e74705SXin Li return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3554*67e74705SXin Li return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3555*67e74705SXin Li }
3556*67e74705SXin Li case NEON::BI__builtin_neon_vmovn_v: {
3557*67e74705SXin Li llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3558*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3559*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3560*67e74705SXin Li }
3561*67e74705SXin Li case NEON::BI__builtin_neon_vmull_v:
3562*67e74705SXin Li // FIXME: the integer vmull operations could be emitted in terms of pure
3563*67e74705SXin Li // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3564*67e74705SXin Li // hoisting the exts outside loops. Until global ISel comes along that can
3565*67e74705SXin Li // see through such movement this leads to bad CodeGen. So we need an
3566*67e74705SXin Li // intrinsic for now.
3567*67e74705SXin Li Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3568*67e74705SXin Li Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3569*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3570*67e74705SXin Li case NEON::BI__builtin_neon_vpadal_v:
3571*67e74705SXin Li case NEON::BI__builtin_neon_vpadalq_v: {
3572*67e74705SXin Li // The source operand type has twice as many elements of half the size.
3573*67e74705SXin Li unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3574*67e74705SXin Li llvm::Type *EltTy =
3575*67e74705SXin Li llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3576*67e74705SXin Li llvm::Type *NarrowTy =
3577*67e74705SXin Li llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3578*67e74705SXin Li llvm::Type *Tys[2] = { Ty, NarrowTy };
3579*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
3580*67e74705SXin Li }
3581*67e74705SXin Li case NEON::BI__builtin_neon_vpaddl_v:
3582*67e74705SXin Li case NEON::BI__builtin_neon_vpaddlq_v: {
3583*67e74705SXin Li // The source operand type has twice as many elements of half the size.
3584*67e74705SXin Li unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3585*67e74705SXin Li llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3586*67e74705SXin Li llvm::Type *NarrowTy =
3587*67e74705SXin Li llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3588*67e74705SXin Li llvm::Type *Tys[2] = { Ty, NarrowTy };
3589*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3590*67e74705SXin Li }
3591*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlal_v:
3592*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlsl_v: {
3593*67e74705SXin Li SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3594*67e74705SXin Li Ops[1] =
3595*67e74705SXin Li EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
3596*67e74705SXin Li Ops.resize(2);
3597*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
3598*67e74705SXin Li }
3599*67e74705SXin Li case NEON::BI__builtin_neon_vqshl_n_v:
3600*67e74705SXin Li case NEON::BI__builtin_neon_vqshlq_n_v:
3601*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3602*67e74705SXin Li 1, false);
3603*67e74705SXin Li case NEON::BI__builtin_neon_vqshlu_n_v:
3604*67e74705SXin Li case NEON::BI__builtin_neon_vqshluq_n_v:
3605*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
3606*67e74705SXin Li 1, false);
3607*67e74705SXin Li case NEON::BI__builtin_neon_vrecpe_v:
3608*67e74705SXin Li case NEON::BI__builtin_neon_vrecpeq_v:
3609*67e74705SXin Li case NEON::BI__builtin_neon_vrsqrte_v:
3610*67e74705SXin Li case NEON::BI__builtin_neon_vrsqrteq_v:
3611*67e74705SXin Li Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
3612*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
3613*67e74705SXin Li
3614*67e74705SXin Li case NEON::BI__builtin_neon_vrshr_n_v:
3615*67e74705SXin Li case NEON::BI__builtin_neon_vrshrq_n_v:
3616*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
3617*67e74705SXin Li 1, true);
3618*67e74705SXin Li case NEON::BI__builtin_neon_vshl_n_v:
3619*67e74705SXin Li case NEON::BI__builtin_neon_vshlq_n_v:
3620*67e74705SXin Li Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3621*67e74705SXin Li return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3622*67e74705SXin Li "vshl_n");
3623*67e74705SXin Li case NEON::BI__builtin_neon_vshll_n_v: {
3624*67e74705SXin Li llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3625*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3626*67e74705SXin Li if (Usgn)
3627*67e74705SXin Li Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3628*67e74705SXin Li else
3629*67e74705SXin Li Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3630*67e74705SXin Li Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3631*67e74705SXin Li return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3632*67e74705SXin Li }
3633*67e74705SXin Li case NEON::BI__builtin_neon_vshrn_n_v: {
3634*67e74705SXin Li llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3635*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3636*67e74705SXin Li Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3637*67e74705SXin Li if (Usgn)
3638*67e74705SXin Li Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3639*67e74705SXin Li else
3640*67e74705SXin Li Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3641*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3642*67e74705SXin Li }
3643*67e74705SXin Li case NEON::BI__builtin_neon_vshr_n_v:
3644*67e74705SXin Li case NEON::BI__builtin_neon_vshrq_n_v:
3645*67e74705SXin Li return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3646*67e74705SXin Li case NEON::BI__builtin_neon_vst1_v:
3647*67e74705SXin Li case NEON::BI__builtin_neon_vst1q_v:
3648*67e74705SXin Li case NEON::BI__builtin_neon_vst2_v:
3649*67e74705SXin Li case NEON::BI__builtin_neon_vst2q_v:
3650*67e74705SXin Li case NEON::BI__builtin_neon_vst3_v:
3651*67e74705SXin Li case NEON::BI__builtin_neon_vst3q_v:
3652*67e74705SXin Li case NEON::BI__builtin_neon_vst4_v:
3653*67e74705SXin Li case NEON::BI__builtin_neon_vst4q_v:
3654*67e74705SXin Li case NEON::BI__builtin_neon_vst2_lane_v:
3655*67e74705SXin Li case NEON::BI__builtin_neon_vst2q_lane_v:
3656*67e74705SXin Li case NEON::BI__builtin_neon_vst3_lane_v:
3657*67e74705SXin Li case NEON::BI__builtin_neon_vst3q_lane_v:
3658*67e74705SXin Li case NEON::BI__builtin_neon_vst4_lane_v:
3659*67e74705SXin Li case NEON::BI__builtin_neon_vst4q_lane_v: {
3660*67e74705SXin Li llvm::Type *Tys[] = {Int8PtrTy, Ty};
3661*67e74705SXin Li Ops.push_back(getAlignmentValue32(PtrOp0));
3662*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
3663*67e74705SXin Li }
3664*67e74705SXin Li case NEON::BI__builtin_neon_vsubhn_v: {
3665*67e74705SXin Li llvm::VectorType *SrcTy =
3666*67e74705SXin Li llvm::VectorType::getExtendedElementVectorType(VTy);
3667*67e74705SXin Li
3668*67e74705SXin Li // %sum = add <4 x i32> %lhs, %rhs
3669*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3670*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3671*67e74705SXin Li Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3672*67e74705SXin Li
3673*67e74705SXin Li // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3674*67e74705SXin Li Constant *ShiftAmt =
3675*67e74705SXin Li ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3676*67e74705SXin Li Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3677*67e74705SXin Li
3678*67e74705SXin Li // %res = trunc <4 x i32> %high to <4 x i16>
3679*67e74705SXin Li return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3680*67e74705SXin Li }
3681*67e74705SXin Li case NEON::BI__builtin_neon_vtrn_v:
3682*67e74705SXin Li case NEON::BI__builtin_neon_vtrnq_v: {
3683*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3684*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3685*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3686*67e74705SXin Li Value *SV = nullptr;
3687*67e74705SXin Li
3688*67e74705SXin Li for (unsigned vi = 0; vi != 2; ++vi) {
3689*67e74705SXin Li SmallVector<uint32_t, 16> Indices;
3690*67e74705SXin Li for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3691*67e74705SXin Li Indices.push_back(i+vi);
3692*67e74705SXin Li Indices.push_back(i+e+vi);
3693*67e74705SXin Li }
3694*67e74705SXin Li Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3695*67e74705SXin Li SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
3696*67e74705SXin Li SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3697*67e74705SXin Li }
3698*67e74705SXin Li return SV;
3699*67e74705SXin Li }
3700*67e74705SXin Li case NEON::BI__builtin_neon_vtst_v:
3701*67e74705SXin Li case NEON::BI__builtin_neon_vtstq_v: {
3702*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3703*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3704*67e74705SXin Li Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3705*67e74705SXin Li Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3706*67e74705SXin Li ConstantAggregateZero::get(Ty));
3707*67e74705SXin Li return Builder.CreateSExt(Ops[0], Ty, "vtst");
3708*67e74705SXin Li }
3709*67e74705SXin Li case NEON::BI__builtin_neon_vuzp_v:
3710*67e74705SXin Li case NEON::BI__builtin_neon_vuzpq_v: {
3711*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3712*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3713*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3714*67e74705SXin Li Value *SV = nullptr;
3715*67e74705SXin Li
3716*67e74705SXin Li for (unsigned vi = 0; vi != 2; ++vi) {
3717*67e74705SXin Li SmallVector<uint32_t, 16> Indices;
3718*67e74705SXin Li for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3719*67e74705SXin Li Indices.push_back(2*i+vi);
3720*67e74705SXin Li
3721*67e74705SXin Li Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3722*67e74705SXin Li SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
3723*67e74705SXin Li SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3724*67e74705SXin Li }
3725*67e74705SXin Li return SV;
3726*67e74705SXin Li }
3727*67e74705SXin Li case NEON::BI__builtin_neon_vzip_v:
3728*67e74705SXin Li case NEON::BI__builtin_neon_vzipq_v: {
3729*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3730*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3731*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3732*67e74705SXin Li Value *SV = nullptr;
3733*67e74705SXin Li
3734*67e74705SXin Li for (unsigned vi = 0; vi != 2; ++vi) {
3735*67e74705SXin Li SmallVector<uint32_t, 16> Indices;
3736*67e74705SXin Li for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3737*67e74705SXin Li Indices.push_back((i + vi*e) >> 1);
3738*67e74705SXin Li Indices.push_back(((i + vi*e) >> 1)+e);
3739*67e74705SXin Li }
3740*67e74705SXin Li Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3741*67e74705SXin Li SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
3742*67e74705SXin Li SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3743*67e74705SXin Li }
3744*67e74705SXin Li return SV;
3745*67e74705SXin Li }
3746*67e74705SXin Li }
3747*67e74705SXin Li
3748*67e74705SXin Li assert(Int && "Expected valid intrinsic number");
3749*67e74705SXin Li
3750*67e74705SXin Li // Determine the type(s) of this overloaded AArch64 intrinsic.
3751*67e74705SXin Li Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
3752*67e74705SXin Li
3753*67e74705SXin Li Value *Result = EmitNeonCall(F, Ops, NameHint);
3754*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
3755*67e74705SXin Li // AArch64 intrinsic one-element vector type cast to
3756*67e74705SXin Li // scalar type expected by the builtin
3757*67e74705SXin Li return Builder.CreateBitCast(Result, ResultType, NameHint);
3758*67e74705SXin Li }
3759*67e74705SXin Li
EmitAArch64CompareBuiltinExpr(Value * Op,llvm::Type * Ty,const CmpInst::Predicate Fp,const CmpInst::Predicate Ip,const Twine & Name)3760*67e74705SXin Li Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
3761*67e74705SXin Li Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
3762*67e74705SXin Li const CmpInst::Predicate Ip, const Twine &Name) {
3763*67e74705SXin Li llvm::Type *OTy = Op->getType();
3764*67e74705SXin Li
3765*67e74705SXin Li // FIXME: this is utterly horrific. We should not be looking at previous
3766*67e74705SXin Li // codegen context to find out what needs doing. Unfortunately TableGen
3767*67e74705SXin Li // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
3768*67e74705SXin Li // (etc).
3769*67e74705SXin Li if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
3770*67e74705SXin Li OTy = BI->getOperand(0)->getType();
3771*67e74705SXin Li
3772*67e74705SXin Li Op = Builder.CreateBitCast(Op, OTy);
3773*67e74705SXin Li if (OTy->getScalarType()->isFloatingPointTy()) {
3774*67e74705SXin Li Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
3775*67e74705SXin Li } else {
3776*67e74705SXin Li Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
3777*67e74705SXin Li }
3778*67e74705SXin Li return Builder.CreateSExt(Op, Ty, Name);
3779*67e74705SXin Li }
3780*67e74705SXin Li
packTBLDVectorList(CodeGenFunction & CGF,ArrayRef<Value * > Ops,Value * ExtOp,Value * IndexOp,llvm::Type * ResTy,unsigned IntID,const char * Name)3781*67e74705SXin Li static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
3782*67e74705SXin Li Value *ExtOp, Value *IndexOp,
3783*67e74705SXin Li llvm::Type *ResTy, unsigned IntID,
3784*67e74705SXin Li const char *Name) {
3785*67e74705SXin Li SmallVector<Value *, 2> TblOps;
3786*67e74705SXin Li if (ExtOp)
3787*67e74705SXin Li TblOps.push_back(ExtOp);
3788*67e74705SXin Li
3789*67e74705SXin Li // Build a vector containing sequential number like (0, 1, 2, ..., 15)
3790*67e74705SXin Li SmallVector<uint32_t, 16> Indices;
3791*67e74705SXin Li llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
3792*67e74705SXin Li for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
3793*67e74705SXin Li Indices.push_back(2*i);
3794*67e74705SXin Li Indices.push_back(2*i+1);
3795*67e74705SXin Li }
3796*67e74705SXin Li
3797*67e74705SXin Li int PairPos = 0, End = Ops.size() - 1;
3798*67e74705SXin Li while (PairPos < End) {
3799*67e74705SXin Li TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3800*67e74705SXin Li Ops[PairPos+1], Indices,
3801*67e74705SXin Li Name));
3802*67e74705SXin Li PairPos += 2;
3803*67e74705SXin Li }
3804*67e74705SXin Li
3805*67e74705SXin Li // If there's an odd number of 64-bit lookup table, fill the high 64-bit
3806*67e74705SXin Li // of the 128-bit lookup table with zero.
3807*67e74705SXin Li if (PairPos == End) {
3808*67e74705SXin Li Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
3809*67e74705SXin Li TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3810*67e74705SXin Li ZeroTbl, Indices, Name));
3811*67e74705SXin Li }
3812*67e74705SXin Li
3813*67e74705SXin Li Function *TblF;
3814*67e74705SXin Li TblOps.push_back(IndexOp);
3815*67e74705SXin Li TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
3816*67e74705SXin Li
3817*67e74705SXin Li return CGF.EmitNeonCall(TblF, TblOps, Name);
3818*67e74705SXin Li }
3819*67e74705SXin Li
GetValueForARMHint(unsigned BuiltinID)3820*67e74705SXin Li Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
3821*67e74705SXin Li unsigned Value;
3822*67e74705SXin Li switch (BuiltinID) {
3823*67e74705SXin Li default:
3824*67e74705SXin Li return nullptr;
3825*67e74705SXin Li case ARM::BI__builtin_arm_nop:
3826*67e74705SXin Li Value = 0;
3827*67e74705SXin Li break;
3828*67e74705SXin Li case ARM::BI__builtin_arm_yield:
3829*67e74705SXin Li case ARM::BI__yield:
3830*67e74705SXin Li Value = 1;
3831*67e74705SXin Li break;
3832*67e74705SXin Li case ARM::BI__builtin_arm_wfe:
3833*67e74705SXin Li case ARM::BI__wfe:
3834*67e74705SXin Li Value = 2;
3835*67e74705SXin Li break;
3836*67e74705SXin Li case ARM::BI__builtin_arm_wfi:
3837*67e74705SXin Li case ARM::BI__wfi:
3838*67e74705SXin Li Value = 3;
3839*67e74705SXin Li break;
3840*67e74705SXin Li case ARM::BI__builtin_arm_sev:
3841*67e74705SXin Li case ARM::BI__sev:
3842*67e74705SXin Li Value = 4;
3843*67e74705SXin Li break;
3844*67e74705SXin Li case ARM::BI__builtin_arm_sevl:
3845*67e74705SXin Li case ARM::BI__sevl:
3846*67e74705SXin Li Value = 5;
3847*67e74705SXin Li break;
3848*67e74705SXin Li }
3849*67e74705SXin Li
3850*67e74705SXin Li return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
3851*67e74705SXin Li llvm::ConstantInt::get(Int32Ty, Value));
3852*67e74705SXin Li }
3853*67e74705SXin Li
3854*67e74705SXin Li // Generates the IR for the read/write special register builtin,
3855*67e74705SXin Li // ValueType is the type of the value that is to be written or read,
3856*67e74705SXin Li // RegisterType is the type of the register being written to or read from.
EmitSpecialRegisterBuiltin(CodeGenFunction & CGF,const CallExpr * E,llvm::Type * RegisterType,llvm::Type * ValueType,bool IsRead,StringRef SysReg="")3857*67e74705SXin Li static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
3858*67e74705SXin Li const CallExpr *E,
3859*67e74705SXin Li llvm::Type *RegisterType,
3860*67e74705SXin Li llvm::Type *ValueType,
3861*67e74705SXin Li bool IsRead,
3862*67e74705SXin Li StringRef SysReg = "") {
3863*67e74705SXin Li // write and register intrinsics only support 32 and 64 bit operations.
3864*67e74705SXin Li assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
3865*67e74705SXin Li && "Unsupported size for register.");
3866*67e74705SXin Li
3867*67e74705SXin Li CodeGen::CGBuilderTy &Builder = CGF.Builder;
3868*67e74705SXin Li CodeGen::CodeGenModule &CGM = CGF.CGM;
3869*67e74705SXin Li LLVMContext &Context = CGM.getLLVMContext();
3870*67e74705SXin Li
3871*67e74705SXin Li if (SysReg.empty()) {
3872*67e74705SXin Li const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
3873*67e74705SXin Li SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
3874*67e74705SXin Li }
3875*67e74705SXin Li
3876*67e74705SXin Li llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
3877*67e74705SXin Li llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
3878*67e74705SXin Li llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
3879*67e74705SXin Li
3880*67e74705SXin Li llvm::Type *Types[] = { RegisterType };
3881*67e74705SXin Li
3882*67e74705SXin Li bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
3883*67e74705SXin Li assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
3884*67e74705SXin Li && "Can't fit 64-bit value in 32-bit register");
3885*67e74705SXin Li
3886*67e74705SXin Li if (IsRead) {
3887*67e74705SXin Li llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
3888*67e74705SXin Li llvm::Value *Call = Builder.CreateCall(F, Metadata);
3889*67e74705SXin Li
3890*67e74705SXin Li if (MixedTypes)
3891*67e74705SXin Li // Read into 64 bit register and then truncate result to 32 bit.
3892*67e74705SXin Li return Builder.CreateTrunc(Call, ValueType);
3893*67e74705SXin Li
3894*67e74705SXin Li if (ValueType->isPointerTy())
3895*67e74705SXin Li // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
3896*67e74705SXin Li return Builder.CreateIntToPtr(Call, ValueType);
3897*67e74705SXin Li
3898*67e74705SXin Li return Call;
3899*67e74705SXin Li }
3900*67e74705SXin Li
3901*67e74705SXin Li llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
3902*67e74705SXin Li llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
3903*67e74705SXin Li if (MixedTypes) {
3904*67e74705SXin Li // Extend 32 bit write value to 64 bit to pass to write.
3905*67e74705SXin Li ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
3906*67e74705SXin Li return Builder.CreateCall(F, { Metadata, ArgValue });
3907*67e74705SXin Li }
3908*67e74705SXin Li
3909*67e74705SXin Li if (ValueType->isPointerTy()) {
3910*67e74705SXin Li // Have VoidPtrTy ArgValue but want to return an i32/i64.
3911*67e74705SXin Li ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
3912*67e74705SXin Li return Builder.CreateCall(F, { Metadata, ArgValue });
3913*67e74705SXin Li }
3914*67e74705SXin Li
3915*67e74705SXin Li return Builder.CreateCall(F, { Metadata, ArgValue });
3916*67e74705SXin Li }
3917*67e74705SXin Li
3918*67e74705SXin Li /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
3919*67e74705SXin Li /// argument that specifies the vector type.
HasExtraNeonArgument(unsigned BuiltinID)3920*67e74705SXin Li static bool HasExtraNeonArgument(unsigned BuiltinID) {
3921*67e74705SXin Li switch (BuiltinID) {
3922*67e74705SXin Li default: break;
3923*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_i8:
3924*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_i16:
3925*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_i32:
3926*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_i64:
3927*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_f32:
3928*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_i8:
3929*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_i16:
3930*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_i32:
3931*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_i64:
3932*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_f32:
3933*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_i8:
3934*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_i16:
3935*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_i32:
3936*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_i64:
3937*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_f32:
3938*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_i8:
3939*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_i16:
3940*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_i32:
3941*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_i64:
3942*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_f32:
3943*67e74705SXin Li case NEON::BI__builtin_neon_vsha1h_u32:
3944*67e74705SXin Li case NEON::BI__builtin_neon_vsha1cq_u32:
3945*67e74705SXin Li case NEON::BI__builtin_neon_vsha1pq_u32:
3946*67e74705SXin Li case NEON::BI__builtin_neon_vsha1mq_u32:
3947*67e74705SXin Li case ARM::BI_MoveToCoprocessor:
3948*67e74705SXin Li case ARM::BI_MoveToCoprocessor2:
3949*67e74705SXin Li return false;
3950*67e74705SXin Li }
3951*67e74705SXin Li return true;
3952*67e74705SXin Li }
3953*67e74705SXin Li
EmitARMBuiltinExpr(unsigned BuiltinID,const CallExpr * E)3954*67e74705SXin Li Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
3955*67e74705SXin Li const CallExpr *E) {
3956*67e74705SXin Li if (auto Hint = GetValueForARMHint(BuiltinID))
3957*67e74705SXin Li return Hint;
3958*67e74705SXin Li
3959*67e74705SXin Li if (BuiltinID == ARM::BI__emit) {
3960*67e74705SXin Li bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
3961*67e74705SXin Li llvm::FunctionType *FTy =
3962*67e74705SXin Li llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
3963*67e74705SXin Li
3964*67e74705SXin Li APSInt Value;
3965*67e74705SXin Li if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
3966*67e74705SXin Li llvm_unreachable("Sema will ensure that the parameter is constant");
3967*67e74705SXin Li
3968*67e74705SXin Li uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
3969*67e74705SXin Li
3970*67e74705SXin Li llvm::InlineAsm *Emit =
3971*67e74705SXin Li IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
3972*67e74705SXin Li /*SideEffects=*/true)
3973*67e74705SXin Li : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
3974*67e74705SXin Li /*SideEffects=*/true);
3975*67e74705SXin Li
3976*67e74705SXin Li return Builder.CreateCall(Emit);
3977*67e74705SXin Li }
3978*67e74705SXin Li
3979*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_dbg) {
3980*67e74705SXin Li Value *Option = EmitScalarExpr(E->getArg(0));
3981*67e74705SXin Li return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
3982*67e74705SXin Li }
3983*67e74705SXin Li
3984*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
3985*67e74705SXin Li Value *Address = EmitScalarExpr(E->getArg(0));
3986*67e74705SXin Li Value *RW = EmitScalarExpr(E->getArg(1));
3987*67e74705SXin Li Value *IsData = EmitScalarExpr(E->getArg(2));
3988*67e74705SXin Li
3989*67e74705SXin Li // Locality is not supported on ARM target
3990*67e74705SXin Li Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
3991*67e74705SXin Li
3992*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
3993*67e74705SXin Li return Builder.CreateCall(F, {Address, RW, Locality, IsData});
3994*67e74705SXin Li }
3995*67e74705SXin Li
3996*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_rbit) {
3997*67e74705SXin Li return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
3998*67e74705SXin Li EmitScalarExpr(E->getArg(0)),
3999*67e74705SXin Li "rbit");
4000*67e74705SXin Li }
4001*67e74705SXin Li
4002*67e74705SXin Li if (BuiltinID == ARM::BI__clear_cache) {
4003*67e74705SXin Li assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4004*67e74705SXin Li const FunctionDecl *FD = E->getDirectCallee();
4005*67e74705SXin Li Value *Ops[2];
4006*67e74705SXin Li for (unsigned i = 0; i < 2; i++)
4007*67e74705SXin Li Ops[i] = EmitScalarExpr(E->getArg(i));
4008*67e74705SXin Li llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4009*67e74705SXin Li llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4010*67e74705SXin Li StringRef Name = FD->getName();
4011*67e74705SXin Li return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4012*67e74705SXin Li }
4013*67e74705SXin Li
4014*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4015*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4016*67e74705SXin Li Function *F;
4017*67e74705SXin Li
4018*67e74705SXin Li switch (BuiltinID) {
4019*67e74705SXin Li default: llvm_unreachable("unexpected builtin");
4020*67e74705SXin Li case ARM::BI__builtin_arm_mcrr:
4021*67e74705SXin Li F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4022*67e74705SXin Li break;
4023*67e74705SXin Li case ARM::BI__builtin_arm_mcrr2:
4024*67e74705SXin Li F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4025*67e74705SXin Li break;
4026*67e74705SXin Li }
4027*67e74705SXin Li
4028*67e74705SXin Li // MCRR{2} instruction has 5 operands but
4029*67e74705SXin Li // the intrinsic has 4 because Rt and Rt2
4030*67e74705SXin Li // are represented as a single unsigned 64
4031*67e74705SXin Li // bit integer in the intrinsic definition
4032*67e74705SXin Li // but internally it's represented as 2 32
4033*67e74705SXin Li // bit integers.
4034*67e74705SXin Li
4035*67e74705SXin Li Value *Coproc = EmitScalarExpr(E->getArg(0));
4036*67e74705SXin Li Value *Opc1 = EmitScalarExpr(E->getArg(1));
4037*67e74705SXin Li Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4038*67e74705SXin Li Value *CRm = EmitScalarExpr(E->getArg(3));
4039*67e74705SXin Li
4040*67e74705SXin Li Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4041*67e74705SXin Li Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4042*67e74705SXin Li Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4043*67e74705SXin Li Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4044*67e74705SXin Li
4045*67e74705SXin Li return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4046*67e74705SXin Li }
4047*67e74705SXin Li
4048*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4049*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4050*67e74705SXin Li Function *F;
4051*67e74705SXin Li
4052*67e74705SXin Li switch (BuiltinID) {
4053*67e74705SXin Li default: llvm_unreachable("unexpected builtin");
4054*67e74705SXin Li case ARM::BI__builtin_arm_mrrc:
4055*67e74705SXin Li F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4056*67e74705SXin Li break;
4057*67e74705SXin Li case ARM::BI__builtin_arm_mrrc2:
4058*67e74705SXin Li F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4059*67e74705SXin Li break;
4060*67e74705SXin Li }
4061*67e74705SXin Li
4062*67e74705SXin Li Value *Coproc = EmitScalarExpr(E->getArg(0));
4063*67e74705SXin Li Value *Opc1 = EmitScalarExpr(E->getArg(1));
4064*67e74705SXin Li Value *CRm = EmitScalarExpr(E->getArg(2));
4065*67e74705SXin Li Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4066*67e74705SXin Li
4067*67e74705SXin Li // Returns an unsigned 64 bit integer, represented
4068*67e74705SXin Li // as two 32 bit integers.
4069*67e74705SXin Li
4070*67e74705SXin Li Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4071*67e74705SXin Li Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4072*67e74705SXin Li Rt = Builder.CreateZExt(Rt, Int64Ty);
4073*67e74705SXin Li Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4074*67e74705SXin Li
4075*67e74705SXin Li Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4076*67e74705SXin Li RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4077*67e74705SXin Li RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4078*67e74705SXin Li
4079*67e74705SXin Li return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4080*67e74705SXin Li }
4081*67e74705SXin Li
4082*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4083*67e74705SXin Li ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4084*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4085*67e74705SXin Li getContext().getTypeSize(E->getType()) == 64) ||
4086*67e74705SXin Li BuiltinID == ARM::BI__ldrexd) {
4087*67e74705SXin Li Function *F;
4088*67e74705SXin Li
4089*67e74705SXin Li switch (BuiltinID) {
4090*67e74705SXin Li default: llvm_unreachable("unexpected builtin");
4091*67e74705SXin Li case ARM::BI__builtin_arm_ldaex:
4092*67e74705SXin Li F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4093*67e74705SXin Li break;
4094*67e74705SXin Li case ARM::BI__builtin_arm_ldrexd:
4095*67e74705SXin Li case ARM::BI__builtin_arm_ldrex:
4096*67e74705SXin Li case ARM::BI__ldrexd:
4097*67e74705SXin Li F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4098*67e74705SXin Li break;
4099*67e74705SXin Li }
4100*67e74705SXin Li
4101*67e74705SXin Li Value *LdPtr = EmitScalarExpr(E->getArg(0));
4102*67e74705SXin Li Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4103*67e74705SXin Li "ldrexd");
4104*67e74705SXin Li
4105*67e74705SXin Li Value *Val0 = Builder.CreateExtractValue(Val, 1);
4106*67e74705SXin Li Value *Val1 = Builder.CreateExtractValue(Val, 0);
4107*67e74705SXin Li Val0 = Builder.CreateZExt(Val0, Int64Ty);
4108*67e74705SXin Li Val1 = Builder.CreateZExt(Val1, Int64Ty);
4109*67e74705SXin Li
4110*67e74705SXin Li Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4111*67e74705SXin Li Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4112*67e74705SXin Li Val = Builder.CreateOr(Val, Val1);
4113*67e74705SXin Li return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4114*67e74705SXin Li }
4115*67e74705SXin Li
4116*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4117*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_ldaex) {
4118*67e74705SXin Li Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4119*67e74705SXin Li
4120*67e74705SXin Li QualType Ty = E->getType();
4121*67e74705SXin Li llvm::Type *RealResTy = ConvertType(Ty);
4122*67e74705SXin Li llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4123*67e74705SXin Li getContext().getTypeSize(Ty));
4124*67e74705SXin Li LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4125*67e74705SXin Li
4126*67e74705SXin Li Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4127*67e74705SXin Li ? Intrinsic::arm_ldaex
4128*67e74705SXin Li : Intrinsic::arm_ldrex,
4129*67e74705SXin Li LoadAddr->getType());
4130*67e74705SXin Li Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4131*67e74705SXin Li
4132*67e74705SXin Li if (RealResTy->isPointerTy())
4133*67e74705SXin Li return Builder.CreateIntToPtr(Val, RealResTy);
4134*67e74705SXin Li else {
4135*67e74705SXin Li Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4136*67e74705SXin Li return Builder.CreateBitCast(Val, RealResTy);
4137*67e74705SXin Li }
4138*67e74705SXin Li }
4139*67e74705SXin Li
4140*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4141*67e74705SXin Li ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4142*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_strex) &&
4143*67e74705SXin Li getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4144*67e74705SXin Li Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4145*67e74705SXin Li ? Intrinsic::arm_stlexd
4146*67e74705SXin Li : Intrinsic::arm_strexd);
4147*67e74705SXin Li llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
4148*67e74705SXin Li
4149*67e74705SXin Li Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4150*67e74705SXin Li Value *Val = EmitScalarExpr(E->getArg(0));
4151*67e74705SXin Li Builder.CreateStore(Val, Tmp);
4152*67e74705SXin Li
4153*67e74705SXin Li Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4154*67e74705SXin Li Val = Builder.CreateLoad(LdPtr);
4155*67e74705SXin Li
4156*67e74705SXin Li Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4157*67e74705SXin Li Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4158*67e74705SXin Li Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4159*67e74705SXin Li return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4160*67e74705SXin Li }
4161*67e74705SXin Li
4162*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_strex ||
4163*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_stlex) {
4164*67e74705SXin Li Value *StoreVal = EmitScalarExpr(E->getArg(0));
4165*67e74705SXin Li Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4166*67e74705SXin Li
4167*67e74705SXin Li QualType Ty = E->getArg(0)->getType();
4168*67e74705SXin Li llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4169*67e74705SXin Li getContext().getTypeSize(Ty));
4170*67e74705SXin Li StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4171*67e74705SXin Li
4172*67e74705SXin Li if (StoreVal->getType()->isPointerTy())
4173*67e74705SXin Li StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4174*67e74705SXin Li else {
4175*67e74705SXin Li StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4176*67e74705SXin Li StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4177*67e74705SXin Li }
4178*67e74705SXin Li
4179*67e74705SXin Li Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4180*67e74705SXin Li ? Intrinsic::arm_stlex
4181*67e74705SXin Li : Intrinsic::arm_strex,
4182*67e74705SXin Li StoreAddr->getType());
4183*67e74705SXin Li return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4184*67e74705SXin Li }
4185*67e74705SXin Li
4186*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4187*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4188*67e74705SXin Li return Builder.CreateCall(F);
4189*67e74705SXin Li }
4190*67e74705SXin Li
4191*67e74705SXin Li // CRC32
4192*67e74705SXin Li Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4193*67e74705SXin Li switch (BuiltinID) {
4194*67e74705SXin Li case ARM::BI__builtin_arm_crc32b:
4195*67e74705SXin Li CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4196*67e74705SXin Li case ARM::BI__builtin_arm_crc32cb:
4197*67e74705SXin Li CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4198*67e74705SXin Li case ARM::BI__builtin_arm_crc32h:
4199*67e74705SXin Li CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4200*67e74705SXin Li case ARM::BI__builtin_arm_crc32ch:
4201*67e74705SXin Li CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4202*67e74705SXin Li case ARM::BI__builtin_arm_crc32w:
4203*67e74705SXin Li case ARM::BI__builtin_arm_crc32d:
4204*67e74705SXin Li CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4205*67e74705SXin Li case ARM::BI__builtin_arm_crc32cw:
4206*67e74705SXin Li case ARM::BI__builtin_arm_crc32cd:
4207*67e74705SXin Li CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4208*67e74705SXin Li }
4209*67e74705SXin Li
4210*67e74705SXin Li if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4211*67e74705SXin Li Value *Arg0 = EmitScalarExpr(E->getArg(0));
4212*67e74705SXin Li Value *Arg1 = EmitScalarExpr(E->getArg(1));
4213*67e74705SXin Li
4214*67e74705SXin Li // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4215*67e74705SXin Li // intrinsics, hence we need different codegen for these cases.
4216*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4217*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4218*67e74705SXin Li Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4219*67e74705SXin Li Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4220*67e74705SXin Li Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4221*67e74705SXin Li Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4222*67e74705SXin Li
4223*67e74705SXin Li Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4224*67e74705SXin Li Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4225*67e74705SXin Li return Builder.CreateCall(F, {Res, Arg1b});
4226*67e74705SXin Li } else {
4227*67e74705SXin Li Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4228*67e74705SXin Li
4229*67e74705SXin Li Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4230*67e74705SXin Li return Builder.CreateCall(F, {Arg0, Arg1});
4231*67e74705SXin Li }
4232*67e74705SXin Li }
4233*67e74705SXin Li
4234*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4235*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4236*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_rsrp ||
4237*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_wsr ||
4238*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4239*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_wsrp) {
4240*67e74705SXin Li
4241*67e74705SXin Li bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4242*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4243*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_rsrp;
4244*67e74705SXin Li
4245*67e74705SXin Li bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4246*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_wsrp;
4247*67e74705SXin Li
4248*67e74705SXin Li bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4249*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_wsr64;
4250*67e74705SXin Li
4251*67e74705SXin Li llvm::Type *ValueType;
4252*67e74705SXin Li llvm::Type *RegisterType;
4253*67e74705SXin Li if (IsPointerBuiltin) {
4254*67e74705SXin Li ValueType = VoidPtrTy;
4255*67e74705SXin Li RegisterType = Int32Ty;
4256*67e74705SXin Li } else if (Is64Bit) {
4257*67e74705SXin Li ValueType = RegisterType = Int64Ty;
4258*67e74705SXin Li } else {
4259*67e74705SXin Li ValueType = RegisterType = Int32Ty;
4260*67e74705SXin Li }
4261*67e74705SXin Li
4262*67e74705SXin Li return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4263*67e74705SXin Li }
4264*67e74705SXin Li
4265*67e74705SXin Li // Find out if any arguments are required to be integer constant
4266*67e74705SXin Li // expressions.
4267*67e74705SXin Li unsigned ICEArguments = 0;
4268*67e74705SXin Li ASTContext::GetBuiltinTypeError Error;
4269*67e74705SXin Li getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4270*67e74705SXin Li assert(Error == ASTContext::GE_None && "Should not codegen an error");
4271*67e74705SXin Li
4272*67e74705SXin Li auto getAlignmentValue32 = [&](Address addr) -> Value* {
4273*67e74705SXin Li return Builder.getInt32(addr.getAlignment().getQuantity());
4274*67e74705SXin Li };
4275*67e74705SXin Li
4276*67e74705SXin Li Address PtrOp0 = Address::invalid();
4277*67e74705SXin Li Address PtrOp1 = Address::invalid();
4278*67e74705SXin Li SmallVector<Value*, 4> Ops;
4279*67e74705SXin Li bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4280*67e74705SXin Li unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4281*67e74705SXin Li for (unsigned i = 0, e = NumArgs; i != e; i++) {
4282*67e74705SXin Li if (i == 0) {
4283*67e74705SXin Li switch (BuiltinID) {
4284*67e74705SXin Li case NEON::BI__builtin_neon_vld1_v:
4285*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_v:
4286*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_lane_v:
4287*67e74705SXin Li case NEON::BI__builtin_neon_vld1_lane_v:
4288*67e74705SXin Li case NEON::BI__builtin_neon_vld1_dup_v:
4289*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_dup_v:
4290*67e74705SXin Li case NEON::BI__builtin_neon_vst1_v:
4291*67e74705SXin Li case NEON::BI__builtin_neon_vst1q_v:
4292*67e74705SXin Li case NEON::BI__builtin_neon_vst1q_lane_v:
4293*67e74705SXin Li case NEON::BI__builtin_neon_vst1_lane_v:
4294*67e74705SXin Li case NEON::BI__builtin_neon_vst2_v:
4295*67e74705SXin Li case NEON::BI__builtin_neon_vst2q_v:
4296*67e74705SXin Li case NEON::BI__builtin_neon_vst2_lane_v:
4297*67e74705SXin Li case NEON::BI__builtin_neon_vst2q_lane_v:
4298*67e74705SXin Li case NEON::BI__builtin_neon_vst3_v:
4299*67e74705SXin Li case NEON::BI__builtin_neon_vst3q_v:
4300*67e74705SXin Li case NEON::BI__builtin_neon_vst3_lane_v:
4301*67e74705SXin Li case NEON::BI__builtin_neon_vst3q_lane_v:
4302*67e74705SXin Li case NEON::BI__builtin_neon_vst4_v:
4303*67e74705SXin Li case NEON::BI__builtin_neon_vst4q_v:
4304*67e74705SXin Li case NEON::BI__builtin_neon_vst4_lane_v:
4305*67e74705SXin Li case NEON::BI__builtin_neon_vst4q_lane_v:
4306*67e74705SXin Li // Get the alignment for the argument in addition to the value;
4307*67e74705SXin Li // we'll use it later.
4308*67e74705SXin Li PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4309*67e74705SXin Li Ops.push_back(PtrOp0.getPointer());
4310*67e74705SXin Li continue;
4311*67e74705SXin Li }
4312*67e74705SXin Li }
4313*67e74705SXin Li if (i == 1) {
4314*67e74705SXin Li switch (BuiltinID) {
4315*67e74705SXin Li case NEON::BI__builtin_neon_vld2_v:
4316*67e74705SXin Li case NEON::BI__builtin_neon_vld2q_v:
4317*67e74705SXin Li case NEON::BI__builtin_neon_vld3_v:
4318*67e74705SXin Li case NEON::BI__builtin_neon_vld3q_v:
4319*67e74705SXin Li case NEON::BI__builtin_neon_vld4_v:
4320*67e74705SXin Li case NEON::BI__builtin_neon_vld4q_v:
4321*67e74705SXin Li case NEON::BI__builtin_neon_vld2_lane_v:
4322*67e74705SXin Li case NEON::BI__builtin_neon_vld2q_lane_v:
4323*67e74705SXin Li case NEON::BI__builtin_neon_vld3_lane_v:
4324*67e74705SXin Li case NEON::BI__builtin_neon_vld3q_lane_v:
4325*67e74705SXin Li case NEON::BI__builtin_neon_vld4_lane_v:
4326*67e74705SXin Li case NEON::BI__builtin_neon_vld4q_lane_v:
4327*67e74705SXin Li case NEON::BI__builtin_neon_vld2_dup_v:
4328*67e74705SXin Li case NEON::BI__builtin_neon_vld3_dup_v:
4329*67e74705SXin Li case NEON::BI__builtin_neon_vld4_dup_v:
4330*67e74705SXin Li // Get the alignment for the argument in addition to the value;
4331*67e74705SXin Li // we'll use it later.
4332*67e74705SXin Li PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4333*67e74705SXin Li Ops.push_back(PtrOp1.getPointer());
4334*67e74705SXin Li continue;
4335*67e74705SXin Li }
4336*67e74705SXin Li }
4337*67e74705SXin Li
4338*67e74705SXin Li if ((ICEArguments & (1 << i)) == 0) {
4339*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(i)));
4340*67e74705SXin Li } else {
4341*67e74705SXin Li // If this is required to be a constant, constant fold it so that we know
4342*67e74705SXin Li // that the generated intrinsic gets a ConstantInt.
4343*67e74705SXin Li llvm::APSInt Result;
4344*67e74705SXin Li bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4345*67e74705SXin Li assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4346*67e74705SXin Li Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4347*67e74705SXin Li }
4348*67e74705SXin Li }
4349*67e74705SXin Li
4350*67e74705SXin Li switch (BuiltinID) {
4351*67e74705SXin Li default: break;
4352*67e74705SXin Li
4353*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_i8:
4354*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_i16:
4355*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_i32:
4356*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_i64:
4357*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_f32:
4358*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_i8:
4359*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_i16:
4360*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_i32:
4361*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_i64:
4362*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_f32:
4363*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4364*67e74705SXin Li
4365*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_i8:
4366*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_i16:
4367*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_i32:
4368*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_i64:
4369*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_f32:
4370*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_i8:
4371*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_i16:
4372*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_i32:
4373*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_i64:
4374*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_f32:
4375*67e74705SXin Li return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4376*67e74705SXin Li
4377*67e74705SXin Li case NEON::BI__builtin_neon_vsha1h_u32:
4378*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4379*67e74705SXin Li "vsha1h");
4380*67e74705SXin Li case NEON::BI__builtin_neon_vsha1cq_u32:
4381*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4382*67e74705SXin Li "vsha1h");
4383*67e74705SXin Li case NEON::BI__builtin_neon_vsha1pq_u32:
4384*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4385*67e74705SXin Li "vsha1h");
4386*67e74705SXin Li case NEON::BI__builtin_neon_vsha1mq_u32:
4387*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4388*67e74705SXin Li "vsha1h");
4389*67e74705SXin Li
4390*67e74705SXin Li // The ARM _MoveToCoprocessor builtins put the input register value as
4391*67e74705SXin Li // the first argument, but the LLVM intrinsic expects it as the third one.
4392*67e74705SXin Li case ARM::BI_MoveToCoprocessor:
4393*67e74705SXin Li case ARM::BI_MoveToCoprocessor2: {
4394*67e74705SXin Li Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4395*67e74705SXin Li Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4396*67e74705SXin Li return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4397*67e74705SXin Li Ops[3], Ops[4], Ops[5]});
4398*67e74705SXin Li }
4399*67e74705SXin Li }
4400*67e74705SXin Li
4401*67e74705SXin Li // Get the last argument, which specifies the vector type.
4402*67e74705SXin Li assert(HasExtraArg);
4403*67e74705SXin Li llvm::APSInt Result;
4404*67e74705SXin Li const Expr *Arg = E->getArg(E->getNumArgs()-1);
4405*67e74705SXin Li if (!Arg->isIntegerConstantExpr(Result, getContext()))
4406*67e74705SXin Li return nullptr;
4407*67e74705SXin Li
4408*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4409*67e74705SXin Li BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4410*67e74705SXin Li // Determine the overloaded type of this builtin.
4411*67e74705SXin Li llvm::Type *Ty;
4412*67e74705SXin Li if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4413*67e74705SXin Li Ty = FloatTy;
4414*67e74705SXin Li else
4415*67e74705SXin Li Ty = DoubleTy;
4416*67e74705SXin Li
4417*67e74705SXin Li // Determine whether this is an unsigned conversion or not.
4418*67e74705SXin Li bool usgn = Result.getZExtValue() == 1;
4419*67e74705SXin Li unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4420*67e74705SXin Li
4421*67e74705SXin Li // Call the appropriate intrinsic.
4422*67e74705SXin Li Function *F = CGM.getIntrinsic(Int, Ty);
4423*67e74705SXin Li return Builder.CreateCall(F, Ops, "vcvtr");
4424*67e74705SXin Li }
4425*67e74705SXin Li
4426*67e74705SXin Li // Determine the type of this overloaded NEON intrinsic.
4427*67e74705SXin Li NeonTypeFlags Type(Result.getZExtValue());
4428*67e74705SXin Li bool usgn = Type.isUnsigned();
4429*67e74705SXin Li bool rightShift = false;
4430*67e74705SXin Li
4431*67e74705SXin Li llvm::VectorType *VTy = GetNeonType(this, Type);
4432*67e74705SXin Li llvm::Type *Ty = VTy;
4433*67e74705SXin Li if (!Ty)
4434*67e74705SXin Li return nullptr;
4435*67e74705SXin Li
4436*67e74705SXin Li // Many NEON builtins have identical semantics and uses in ARM and
4437*67e74705SXin Li // AArch64. Emit these in a single function.
4438*67e74705SXin Li auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4439*67e74705SXin Li const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4440*67e74705SXin Li IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4441*67e74705SXin Li if (Builtin)
4442*67e74705SXin Li return EmitCommonNeonBuiltinExpr(
4443*67e74705SXin Li Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4444*67e74705SXin Li Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4445*67e74705SXin Li
4446*67e74705SXin Li unsigned Int;
4447*67e74705SXin Li switch (BuiltinID) {
4448*67e74705SXin Li default: return nullptr;
4449*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_lane_v:
4450*67e74705SXin Li // Handle 64-bit integer elements as a special case. Use shuffles of
4451*67e74705SXin Li // one-element vectors to avoid poor code for i64 in the backend.
4452*67e74705SXin Li if (VTy->getElementType()->isIntegerTy(64)) {
4453*67e74705SXin Li // Extract the other lane.
4454*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4455*67e74705SXin Li uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4456*67e74705SXin Li Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4457*67e74705SXin Li Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4458*67e74705SXin Li // Load the value as a one-element vector.
4459*67e74705SXin Li Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4460*67e74705SXin Li llvm::Type *Tys[] = {Ty, Int8PtrTy};
4461*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4462*67e74705SXin Li Value *Align = getAlignmentValue32(PtrOp0);
4463*67e74705SXin Li Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4464*67e74705SXin Li // Combine them.
4465*67e74705SXin Li uint32_t Indices[] = {1 - Lane, Lane};
4466*67e74705SXin Li SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4467*67e74705SXin Li return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4468*67e74705SXin Li }
4469*67e74705SXin Li // fall through
4470*67e74705SXin Li case NEON::BI__builtin_neon_vld1_lane_v: {
4471*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4472*67e74705SXin Li PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4473*67e74705SXin Li Value *Ld = Builder.CreateLoad(PtrOp0);
4474*67e74705SXin Li return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4475*67e74705SXin Li }
4476*67e74705SXin Li case NEON::BI__builtin_neon_vld2_dup_v:
4477*67e74705SXin Li case NEON::BI__builtin_neon_vld3_dup_v:
4478*67e74705SXin Li case NEON::BI__builtin_neon_vld4_dup_v: {
4479*67e74705SXin Li // Handle 64-bit elements as a special-case. There is no "dup" needed.
4480*67e74705SXin Li if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4481*67e74705SXin Li switch (BuiltinID) {
4482*67e74705SXin Li case NEON::BI__builtin_neon_vld2_dup_v:
4483*67e74705SXin Li Int = Intrinsic::arm_neon_vld2;
4484*67e74705SXin Li break;
4485*67e74705SXin Li case NEON::BI__builtin_neon_vld3_dup_v:
4486*67e74705SXin Li Int = Intrinsic::arm_neon_vld3;
4487*67e74705SXin Li break;
4488*67e74705SXin Li case NEON::BI__builtin_neon_vld4_dup_v:
4489*67e74705SXin Li Int = Intrinsic::arm_neon_vld4;
4490*67e74705SXin Li break;
4491*67e74705SXin Li default: llvm_unreachable("unknown vld_dup intrinsic?");
4492*67e74705SXin Li }
4493*67e74705SXin Li llvm::Type *Tys[] = {Ty, Int8PtrTy};
4494*67e74705SXin Li Function *F = CGM.getIntrinsic(Int, Tys);
4495*67e74705SXin Li llvm::Value *Align = getAlignmentValue32(PtrOp1);
4496*67e74705SXin Li Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4497*67e74705SXin Li Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4498*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4499*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4500*67e74705SXin Li }
4501*67e74705SXin Li switch (BuiltinID) {
4502*67e74705SXin Li case NEON::BI__builtin_neon_vld2_dup_v:
4503*67e74705SXin Li Int = Intrinsic::arm_neon_vld2lane;
4504*67e74705SXin Li break;
4505*67e74705SXin Li case NEON::BI__builtin_neon_vld3_dup_v:
4506*67e74705SXin Li Int = Intrinsic::arm_neon_vld3lane;
4507*67e74705SXin Li break;
4508*67e74705SXin Li case NEON::BI__builtin_neon_vld4_dup_v:
4509*67e74705SXin Li Int = Intrinsic::arm_neon_vld4lane;
4510*67e74705SXin Li break;
4511*67e74705SXin Li default: llvm_unreachable("unknown vld_dup intrinsic?");
4512*67e74705SXin Li }
4513*67e74705SXin Li llvm::Type *Tys[] = {Ty, Int8PtrTy};
4514*67e74705SXin Li Function *F = CGM.getIntrinsic(Int, Tys);
4515*67e74705SXin Li llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
4516*67e74705SXin Li
4517*67e74705SXin Li SmallVector<Value*, 6> Args;
4518*67e74705SXin Li Args.push_back(Ops[1]);
4519*67e74705SXin Li Args.append(STy->getNumElements(), UndefValue::get(Ty));
4520*67e74705SXin Li
4521*67e74705SXin Li llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
4522*67e74705SXin Li Args.push_back(CI);
4523*67e74705SXin Li Args.push_back(getAlignmentValue32(PtrOp1));
4524*67e74705SXin Li
4525*67e74705SXin Li Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
4526*67e74705SXin Li // splat lane 0 to all elts in each vector of the result.
4527*67e74705SXin Li for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4528*67e74705SXin Li Value *Val = Builder.CreateExtractValue(Ops[1], i);
4529*67e74705SXin Li Value *Elt = Builder.CreateBitCast(Val, Ty);
4530*67e74705SXin Li Elt = EmitNeonSplat(Elt, CI);
4531*67e74705SXin Li Elt = Builder.CreateBitCast(Elt, Val->getType());
4532*67e74705SXin Li Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
4533*67e74705SXin Li }
4534*67e74705SXin Li Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4535*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4536*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4537*67e74705SXin Li }
4538*67e74705SXin Li case NEON::BI__builtin_neon_vqrshrn_n_v:
4539*67e74705SXin Li Int =
4540*67e74705SXin Li usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
4541*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
4542*67e74705SXin Li 1, true);
4543*67e74705SXin Li case NEON::BI__builtin_neon_vqrshrun_n_v:
4544*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
4545*67e74705SXin Li Ops, "vqrshrun_n", 1, true);
4546*67e74705SXin Li case NEON::BI__builtin_neon_vqshrn_n_v:
4547*67e74705SXin Li Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
4548*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
4549*67e74705SXin Li 1, true);
4550*67e74705SXin Li case NEON::BI__builtin_neon_vqshrun_n_v:
4551*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
4552*67e74705SXin Li Ops, "vqshrun_n", 1, true);
4553*67e74705SXin Li case NEON::BI__builtin_neon_vrecpe_v:
4554*67e74705SXin Li case NEON::BI__builtin_neon_vrecpeq_v:
4555*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
4556*67e74705SXin Li Ops, "vrecpe");
4557*67e74705SXin Li case NEON::BI__builtin_neon_vrshrn_n_v:
4558*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
4559*67e74705SXin Li Ops, "vrshrn_n", 1, true);
4560*67e74705SXin Li case NEON::BI__builtin_neon_vrsra_n_v:
4561*67e74705SXin Li case NEON::BI__builtin_neon_vrsraq_n_v:
4562*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4563*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4564*67e74705SXin Li Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
4565*67e74705SXin Li Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
4566*67e74705SXin Li Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
4567*67e74705SXin Li return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
4568*67e74705SXin Li case NEON::BI__builtin_neon_vsri_n_v:
4569*67e74705SXin Li case NEON::BI__builtin_neon_vsriq_n_v:
4570*67e74705SXin Li rightShift = true;
4571*67e74705SXin Li case NEON::BI__builtin_neon_vsli_n_v:
4572*67e74705SXin Li case NEON::BI__builtin_neon_vsliq_n_v:
4573*67e74705SXin Li Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
4574*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
4575*67e74705SXin Li Ops, "vsli_n");
4576*67e74705SXin Li case NEON::BI__builtin_neon_vsra_n_v:
4577*67e74705SXin Li case NEON::BI__builtin_neon_vsraq_n_v:
4578*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4579*67e74705SXin Li Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
4580*67e74705SXin Li return Builder.CreateAdd(Ops[0], Ops[1]);
4581*67e74705SXin Li case NEON::BI__builtin_neon_vst1q_lane_v:
4582*67e74705SXin Li // Handle 64-bit integer elements as a special case. Use a shuffle to get
4583*67e74705SXin Li // a one-element vector and avoid poor code for i64 in the backend.
4584*67e74705SXin Li if (VTy->getElementType()->isIntegerTy(64)) {
4585*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4586*67e74705SXin Li Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
4587*67e74705SXin Li Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4588*67e74705SXin Li Ops[2] = getAlignmentValue32(PtrOp0);
4589*67e74705SXin Li llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
4590*67e74705SXin Li return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
4591*67e74705SXin Li Tys), Ops);
4592*67e74705SXin Li }
4593*67e74705SXin Li // fall through
4594*67e74705SXin Li case NEON::BI__builtin_neon_vst1_lane_v: {
4595*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4596*67e74705SXin Li Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
4597*67e74705SXin Li Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4598*67e74705SXin Li auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
4599*67e74705SXin Li return St;
4600*67e74705SXin Li }
4601*67e74705SXin Li case NEON::BI__builtin_neon_vtbl1_v:
4602*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
4603*67e74705SXin Li Ops, "vtbl1");
4604*67e74705SXin Li case NEON::BI__builtin_neon_vtbl2_v:
4605*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
4606*67e74705SXin Li Ops, "vtbl2");
4607*67e74705SXin Li case NEON::BI__builtin_neon_vtbl3_v:
4608*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
4609*67e74705SXin Li Ops, "vtbl3");
4610*67e74705SXin Li case NEON::BI__builtin_neon_vtbl4_v:
4611*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
4612*67e74705SXin Li Ops, "vtbl4");
4613*67e74705SXin Li case NEON::BI__builtin_neon_vtbx1_v:
4614*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
4615*67e74705SXin Li Ops, "vtbx1");
4616*67e74705SXin Li case NEON::BI__builtin_neon_vtbx2_v:
4617*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
4618*67e74705SXin Li Ops, "vtbx2");
4619*67e74705SXin Li case NEON::BI__builtin_neon_vtbx3_v:
4620*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
4621*67e74705SXin Li Ops, "vtbx3");
4622*67e74705SXin Li case NEON::BI__builtin_neon_vtbx4_v:
4623*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
4624*67e74705SXin Li Ops, "vtbx4");
4625*67e74705SXin Li }
4626*67e74705SXin Li }
4627*67e74705SXin Li
EmitAArch64TblBuiltinExpr(CodeGenFunction & CGF,unsigned BuiltinID,const CallExpr * E,SmallVectorImpl<Value * > & Ops)4628*67e74705SXin Li static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
4629*67e74705SXin Li const CallExpr *E,
4630*67e74705SXin Li SmallVectorImpl<Value *> &Ops) {
4631*67e74705SXin Li unsigned int Int = 0;
4632*67e74705SXin Li const char *s = nullptr;
4633*67e74705SXin Li
4634*67e74705SXin Li switch (BuiltinID) {
4635*67e74705SXin Li default:
4636*67e74705SXin Li return nullptr;
4637*67e74705SXin Li case NEON::BI__builtin_neon_vtbl1_v:
4638*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl1_v:
4639*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl1q_v:
4640*67e74705SXin Li case NEON::BI__builtin_neon_vtbl2_v:
4641*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl2_v:
4642*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl2q_v:
4643*67e74705SXin Li case NEON::BI__builtin_neon_vtbl3_v:
4644*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl3_v:
4645*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl3q_v:
4646*67e74705SXin Li case NEON::BI__builtin_neon_vtbl4_v:
4647*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl4_v:
4648*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl4q_v:
4649*67e74705SXin Li break;
4650*67e74705SXin Li case NEON::BI__builtin_neon_vtbx1_v:
4651*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx1_v:
4652*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx1q_v:
4653*67e74705SXin Li case NEON::BI__builtin_neon_vtbx2_v:
4654*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx2_v:
4655*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx2q_v:
4656*67e74705SXin Li case NEON::BI__builtin_neon_vtbx3_v:
4657*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx3_v:
4658*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx3q_v:
4659*67e74705SXin Li case NEON::BI__builtin_neon_vtbx4_v:
4660*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx4_v:
4661*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx4q_v:
4662*67e74705SXin Li break;
4663*67e74705SXin Li }
4664*67e74705SXin Li
4665*67e74705SXin Li assert(E->getNumArgs() >= 3);
4666*67e74705SXin Li
4667*67e74705SXin Li // Get the last argument, which specifies the vector type.
4668*67e74705SXin Li llvm::APSInt Result;
4669*67e74705SXin Li const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4670*67e74705SXin Li if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
4671*67e74705SXin Li return nullptr;
4672*67e74705SXin Li
4673*67e74705SXin Li // Determine the type of this overloaded NEON intrinsic.
4674*67e74705SXin Li NeonTypeFlags Type(Result.getZExtValue());
4675*67e74705SXin Li llvm::VectorType *Ty = GetNeonType(&CGF, Type);
4676*67e74705SXin Li if (!Ty)
4677*67e74705SXin Li return nullptr;
4678*67e74705SXin Li
4679*67e74705SXin Li CodeGen::CGBuilderTy &Builder = CGF.Builder;
4680*67e74705SXin Li
4681*67e74705SXin Li // AArch64 scalar builtins are not overloaded, they do not have an extra
4682*67e74705SXin Li // argument that specifies the vector type, need to handle each case.
4683*67e74705SXin Li switch (BuiltinID) {
4684*67e74705SXin Li case NEON::BI__builtin_neon_vtbl1_v: {
4685*67e74705SXin Li return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
4686*67e74705SXin Li Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
4687*67e74705SXin Li "vtbl1");
4688*67e74705SXin Li }
4689*67e74705SXin Li case NEON::BI__builtin_neon_vtbl2_v: {
4690*67e74705SXin Li return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
4691*67e74705SXin Li Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
4692*67e74705SXin Li "vtbl1");
4693*67e74705SXin Li }
4694*67e74705SXin Li case NEON::BI__builtin_neon_vtbl3_v: {
4695*67e74705SXin Li return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
4696*67e74705SXin Li Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
4697*67e74705SXin Li "vtbl2");
4698*67e74705SXin Li }
4699*67e74705SXin Li case NEON::BI__builtin_neon_vtbl4_v: {
4700*67e74705SXin Li return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
4701*67e74705SXin Li Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
4702*67e74705SXin Li "vtbl2");
4703*67e74705SXin Li }
4704*67e74705SXin Li case NEON::BI__builtin_neon_vtbx1_v: {
4705*67e74705SXin Li Value *TblRes =
4706*67e74705SXin Li packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
4707*67e74705SXin Li Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
4708*67e74705SXin Li
4709*67e74705SXin Li llvm::Constant *EightV = ConstantInt::get(Ty, 8);
4710*67e74705SXin Li Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
4711*67e74705SXin Li CmpRes = Builder.CreateSExt(CmpRes, Ty);
4712*67e74705SXin Li
4713*67e74705SXin Li Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4714*67e74705SXin Li Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4715*67e74705SXin Li return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4716*67e74705SXin Li }
4717*67e74705SXin Li case NEON::BI__builtin_neon_vtbx2_v: {
4718*67e74705SXin Li return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
4719*67e74705SXin Li Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
4720*67e74705SXin Li "vtbx1");
4721*67e74705SXin Li }
4722*67e74705SXin Li case NEON::BI__builtin_neon_vtbx3_v: {
4723*67e74705SXin Li Value *TblRes =
4724*67e74705SXin Li packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
4725*67e74705SXin Li Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
4726*67e74705SXin Li
4727*67e74705SXin Li llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
4728*67e74705SXin Li Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
4729*67e74705SXin Li TwentyFourV);
4730*67e74705SXin Li CmpRes = Builder.CreateSExt(CmpRes, Ty);
4731*67e74705SXin Li
4732*67e74705SXin Li Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4733*67e74705SXin Li Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4734*67e74705SXin Li return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4735*67e74705SXin Li }
4736*67e74705SXin Li case NEON::BI__builtin_neon_vtbx4_v: {
4737*67e74705SXin Li return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
4738*67e74705SXin Li Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
4739*67e74705SXin Li "vtbx2");
4740*67e74705SXin Li }
4741*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl1_v:
4742*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl1q_v:
4743*67e74705SXin Li Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
4744*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl2_v:
4745*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl2q_v: {
4746*67e74705SXin Li Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
4747*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl3_v:
4748*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl3q_v:
4749*67e74705SXin Li Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
4750*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl4_v:
4751*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl4q_v:
4752*67e74705SXin Li Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
4753*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx1_v:
4754*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx1q_v:
4755*67e74705SXin Li Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
4756*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx2_v:
4757*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx2q_v:
4758*67e74705SXin Li Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
4759*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx3_v:
4760*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx3q_v:
4761*67e74705SXin Li Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
4762*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx4_v:
4763*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx4q_v:
4764*67e74705SXin Li Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
4765*67e74705SXin Li }
4766*67e74705SXin Li }
4767*67e74705SXin Li
4768*67e74705SXin Li if (!Int)
4769*67e74705SXin Li return nullptr;
4770*67e74705SXin Li
4771*67e74705SXin Li Function *F = CGF.CGM.getIntrinsic(Int, Ty);
4772*67e74705SXin Li return CGF.EmitNeonCall(F, Ops, s);
4773*67e74705SXin Li }
4774*67e74705SXin Li
vectorWrapScalar16(Value * Op)4775*67e74705SXin Li Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
4776*67e74705SXin Li llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
4777*67e74705SXin Li Op = Builder.CreateBitCast(Op, Int16Ty);
4778*67e74705SXin Li Value *V = UndefValue::get(VTy);
4779*67e74705SXin Li llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4780*67e74705SXin Li Op = Builder.CreateInsertElement(V, Op, CI);
4781*67e74705SXin Li return Op;
4782*67e74705SXin Li }
4783*67e74705SXin Li
EmitAArch64BuiltinExpr(unsigned BuiltinID,const CallExpr * E)4784*67e74705SXin Li Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
4785*67e74705SXin Li const CallExpr *E) {
4786*67e74705SXin Li unsigned HintID = static_cast<unsigned>(-1);
4787*67e74705SXin Li switch (BuiltinID) {
4788*67e74705SXin Li default: break;
4789*67e74705SXin Li case AArch64::BI__builtin_arm_nop:
4790*67e74705SXin Li HintID = 0;
4791*67e74705SXin Li break;
4792*67e74705SXin Li case AArch64::BI__builtin_arm_yield:
4793*67e74705SXin Li HintID = 1;
4794*67e74705SXin Li break;
4795*67e74705SXin Li case AArch64::BI__builtin_arm_wfe:
4796*67e74705SXin Li HintID = 2;
4797*67e74705SXin Li break;
4798*67e74705SXin Li case AArch64::BI__builtin_arm_wfi:
4799*67e74705SXin Li HintID = 3;
4800*67e74705SXin Li break;
4801*67e74705SXin Li case AArch64::BI__builtin_arm_sev:
4802*67e74705SXin Li HintID = 4;
4803*67e74705SXin Li break;
4804*67e74705SXin Li case AArch64::BI__builtin_arm_sevl:
4805*67e74705SXin Li HintID = 5;
4806*67e74705SXin Li break;
4807*67e74705SXin Li }
4808*67e74705SXin Li
4809*67e74705SXin Li if (HintID != static_cast<unsigned>(-1)) {
4810*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4811*67e74705SXin Li return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4812*67e74705SXin Li }
4813*67e74705SXin Li
4814*67e74705SXin Li if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
4815*67e74705SXin Li Value *Address = EmitScalarExpr(E->getArg(0));
4816*67e74705SXin Li Value *RW = EmitScalarExpr(E->getArg(1));
4817*67e74705SXin Li Value *CacheLevel = EmitScalarExpr(E->getArg(2));
4818*67e74705SXin Li Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
4819*67e74705SXin Li Value *IsData = EmitScalarExpr(E->getArg(4));
4820*67e74705SXin Li
4821*67e74705SXin Li Value *Locality = nullptr;
4822*67e74705SXin Li if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
4823*67e74705SXin Li // Temporal fetch, needs to convert cache level to locality.
4824*67e74705SXin Li Locality = llvm::ConstantInt::get(Int32Ty,
4825*67e74705SXin Li -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
4826*67e74705SXin Li } else {
4827*67e74705SXin Li // Streaming fetch.
4828*67e74705SXin Li Locality = llvm::ConstantInt::get(Int32Ty, 0);
4829*67e74705SXin Li }
4830*67e74705SXin Li
4831*67e74705SXin Li // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
4832*67e74705SXin Li // PLDL3STRM or PLDL2STRM.
4833*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4834*67e74705SXin Li return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4835*67e74705SXin Li }
4836*67e74705SXin Li
4837*67e74705SXin Li if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
4838*67e74705SXin Li assert((getContext().getTypeSize(E->getType()) == 32) &&
4839*67e74705SXin Li "rbit of unusual size!");
4840*67e74705SXin Li llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4841*67e74705SXin Li return Builder.CreateCall(
4842*67e74705SXin Li CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4843*67e74705SXin Li }
4844*67e74705SXin Li if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
4845*67e74705SXin Li assert((getContext().getTypeSize(E->getType()) == 64) &&
4846*67e74705SXin Li "rbit of unusual size!");
4847*67e74705SXin Li llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4848*67e74705SXin Li return Builder.CreateCall(
4849*67e74705SXin Li CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4850*67e74705SXin Li }
4851*67e74705SXin Li
4852*67e74705SXin Li if (BuiltinID == AArch64::BI__clear_cache) {
4853*67e74705SXin Li assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4854*67e74705SXin Li const FunctionDecl *FD = E->getDirectCallee();
4855*67e74705SXin Li Value *Ops[2];
4856*67e74705SXin Li for (unsigned i = 0; i < 2; i++)
4857*67e74705SXin Li Ops[i] = EmitScalarExpr(E->getArg(i));
4858*67e74705SXin Li llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4859*67e74705SXin Li llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4860*67e74705SXin Li StringRef Name = FD->getName();
4861*67e74705SXin Li return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4862*67e74705SXin Li }
4863*67e74705SXin Li
4864*67e74705SXin Li if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4865*67e74705SXin Li BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
4866*67e74705SXin Li getContext().getTypeSize(E->getType()) == 128) {
4867*67e74705SXin Li Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4868*67e74705SXin Li ? Intrinsic::aarch64_ldaxp
4869*67e74705SXin Li : Intrinsic::aarch64_ldxp);
4870*67e74705SXin Li
4871*67e74705SXin Li Value *LdPtr = EmitScalarExpr(E->getArg(0));
4872*67e74705SXin Li Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4873*67e74705SXin Li "ldxp");
4874*67e74705SXin Li
4875*67e74705SXin Li Value *Val0 = Builder.CreateExtractValue(Val, 1);
4876*67e74705SXin Li Value *Val1 = Builder.CreateExtractValue(Val, 0);
4877*67e74705SXin Li llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4878*67e74705SXin Li Val0 = Builder.CreateZExt(Val0, Int128Ty);
4879*67e74705SXin Li Val1 = Builder.CreateZExt(Val1, Int128Ty);
4880*67e74705SXin Li
4881*67e74705SXin Li Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4882*67e74705SXin Li Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4883*67e74705SXin Li Val = Builder.CreateOr(Val, Val1);
4884*67e74705SXin Li return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4885*67e74705SXin Li } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4886*67e74705SXin Li BuiltinID == AArch64::BI__builtin_arm_ldaex) {
4887*67e74705SXin Li Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4888*67e74705SXin Li
4889*67e74705SXin Li QualType Ty = E->getType();
4890*67e74705SXin Li llvm::Type *RealResTy = ConvertType(Ty);
4891*67e74705SXin Li llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4892*67e74705SXin Li getContext().getTypeSize(Ty));
4893*67e74705SXin Li LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4894*67e74705SXin Li
4895*67e74705SXin Li Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4896*67e74705SXin Li ? Intrinsic::aarch64_ldaxr
4897*67e74705SXin Li : Intrinsic::aarch64_ldxr,
4898*67e74705SXin Li LoadAddr->getType());
4899*67e74705SXin Li Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4900*67e74705SXin Li
4901*67e74705SXin Li if (RealResTy->isPointerTy())
4902*67e74705SXin Li return Builder.CreateIntToPtr(Val, RealResTy);
4903*67e74705SXin Li
4904*67e74705SXin Li Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4905*67e74705SXin Li return Builder.CreateBitCast(Val, RealResTy);
4906*67e74705SXin Li }
4907*67e74705SXin Li
4908*67e74705SXin Li if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
4909*67e74705SXin Li BuiltinID == AArch64::BI__builtin_arm_stlex) &&
4910*67e74705SXin Li getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4911*67e74705SXin Li Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4912*67e74705SXin Li ? Intrinsic::aarch64_stlxp
4913*67e74705SXin Li : Intrinsic::aarch64_stxp);
4914*67e74705SXin Li llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
4915*67e74705SXin Li
4916*67e74705SXin Li Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4917*67e74705SXin Li EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4918*67e74705SXin Li
4919*67e74705SXin Li Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
4920*67e74705SXin Li llvm::Value *Val = Builder.CreateLoad(Tmp);
4921*67e74705SXin Li
4922*67e74705SXin Li Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4923*67e74705SXin Li Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4924*67e74705SXin Li Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
4925*67e74705SXin Li Int8PtrTy);
4926*67e74705SXin Li return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4927*67e74705SXin Li }
4928*67e74705SXin Li
4929*67e74705SXin Li if (BuiltinID == AArch64::BI__builtin_arm_strex ||
4930*67e74705SXin Li BuiltinID == AArch64::BI__builtin_arm_stlex) {
4931*67e74705SXin Li Value *StoreVal = EmitScalarExpr(E->getArg(0));
4932*67e74705SXin Li Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4933*67e74705SXin Li
4934*67e74705SXin Li QualType Ty = E->getArg(0)->getType();
4935*67e74705SXin Li llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4936*67e74705SXin Li getContext().getTypeSize(Ty));
4937*67e74705SXin Li StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4938*67e74705SXin Li
4939*67e74705SXin Li if (StoreVal->getType()->isPointerTy())
4940*67e74705SXin Li StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4941*67e74705SXin Li else {
4942*67e74705SXin Li StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4943*67e74705SXin Li StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4944*67e74705SXin Li }
4945*67e74705SXin Li
4946*67e74705SXin Li Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4947*67e74705SXin Li ? Intrinsic::aarch64_stlxr
4948*67e74705SXin Li : Intrinsic::aarch64_stxr,
4949*67e74705SXin Li StoreAddr->getType());
4950*67e74705SXin Li return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4951*67e74705SXin Li }
4952*67e74705SXin Li
4953*67e74705SXin Li if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
4954*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4955*67e74705SXin Li return Builder.CreateCall(F);
4956*67e74705SXin Li }
4957*67e74705SXin Li
4958*67e74705SXin Li // CRC32
4959*67e74705SXin Li Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4960*67e74705SXin Li switch (BuiltinID) {
4961*67e74705SXin Li case AArch64::BI__builtin_arm_crc32b:
4962*67e74705SXin Li CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4963*67e74705SXin Li case AArch64::BI__builtin_arm_crc32cb:
4964*67e74705SXin Li CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4965*67e74705SXin Li case AArch64::BI__builtin_arm_crc32h:
4966*67e74705SXin Li CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4967*67e74705SXin Li case AArch64::BI__builtin_arm_crc32ch:
4968*67e74705SXin Li CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4969*67e74705SXin Li case AArch64::BI__builtin_arm_crc32w:
4970*67e74705SXin Li CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4971*67e74705SXin Li case AArch64::BI__builtin_arm_crc32cw:
4972*67e74705SXin Li CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4973*67e74705SXin Li case AArch64::BI__builtin_arm_crc32d:
4974*67e74705SXin Li CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4975*67e74705SXin Li case AArch64::BI__builtin_arm_crc32cd:
4976*67e74705SXin Li CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4977*67e74705SXin Li }
4978*67e74705SXin Li
4979*67e74705SXin Li if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4980*67e74705SXin Li Value *Arg0 = EmitScalarExpr(E->getArg(0));
4981*67e74705SXin Li Value *Arg1 = EmitScalarExpr(E->getArg(1));
4982*67e74705SXin Li Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4983*67e74705SXin Li
4984*67e74705SXin Li llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4985*67e74705SXin Li Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4986*67e74705SXin Li
4987*67e74705SXin Li return Builder.CreateCall(F, {Arg0, Arg1});
4988*67e74705SXin Li }
4989*67e74705SXin Li
4990*67e74705SXin Li if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
4991*67e74705SXin Li BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4992*67e74705SXin Li BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4993*67e74705SXin Li BuiltinID == AArch64::BI__builtin_arm_wsr ||
4994*67e74705SXin Li BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
4995*67e74705SXin Li BuiltinID == AArch64::BI__builtin_arm_wsrp) {
4996*67e74705SXin Li
4997*67e74705SXin Li bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
4998*67e74705SXin Li BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4999*67e74705SXin Li BuiltinID == AArch64::BI__builtin_arm_rsrp;
5000*67e74705SXin Li
5001*67e74705SXin Li bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5002*67e74705SXin Li BuiltinID == AArch64::BI__builtin_arm_wsrp;
5003*67e74705SXin Li
5004*67e74705SXin Li bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5005*67e74705SXin Li BuiltinID != AArch64::BI__builtin_arm_wsr;
5006*67e74705SXin Li
5007*67e74705SXin Li llvm::Type *ValueType;
5008*67e74705SXin Li llvm::Type *RegisterType = Int64Ty;
5009*67e74705SXin Li if (IsPointerBuiltin) {
5010*67e74705SXin Li ValueType = VoidPtrTy;
5011*67e74705SXin Li } else if (Is64Bit) {
5012*67e74705SXin Li ValueType = Int64Ty;
5013*67e74705SXin Li } else {
5014*67e74705SXin Li ValueType = Int32Ty;
5015*67e74705SXin Li }
5016*67e74705SXin Li
5017*67e74705SXin Li return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5018*67e74705SXin Li }
5019*67e74705SXin Li
5020*67e74705SXin Li // Find out if any arguments are required to be integer constant
5021*67e74705SXin Li // expressions.
5022*67e74705SXin Li unsigned ICEArguments = 0;
5023*67e74705SXin Li ASTContext::GetBuiltinTypeError Error;
5024*67e74705SXin Li getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5025*67e74705SXin Li assert(Error == ASTContext::GE_None && "Should not codegen an error");
5026*67e74705SXin Li
5027*67e74705SXin Li llvm::SmallVector<Value*, 4> Ops;
5028*67e74705SXin Li for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5029*67e74705SXin Li if ((ICEArguments & (1 << i)) == 0) {
5030*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(i)));
5031*67e74705SXin Li } else {
5032*67e74705SXin Li // If this is required to be a constant, constant fold it so that we know
5033*67e74705SXin Li // that the generated intrinsic gets a ConstantInt.
5034*67e74705SXin Li llvm::APSInt Result;
5035*67e74705SXin Li bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5036*67e74705SXin Li assert(IsConst && "Constant arg isn't actually constant?");
5037*67e74705SXin Li (void)IsConst;
5038*67e74705SXin Li Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5039*67e74705SXin Li }
5040*67e74705SXin Li }
5041*67e74705SXin Li
5042*67e74705SXin Li auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5043*67e74705SXin Li const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5044*67e74705SXin Li SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5045*67e74705SXin Li
5046*67e74705SXin Li if (Builtin) {
5047*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5048*67e74705SXin Li Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5049*67e74705SXin Li assert(Result && "SISD intrinsic should have been handled");
5050*67e74705SXin Li return Result;
5051*67e74705SXin Li }
5052*67e74705SXin Li
5053*67e74705SXin Li llvm::APSInt Result;
5054*67e74705SXin Li const Expr *Arg = E->getArg(E->getNumArgs()-1);
5055*67e74705SXin Li NeonTypeFlags Type(0);
5056*67e74705SXin Li if (Arg->isIntegerConstantExpr(Result, getContext()))
5057*67e74705SXin Li // Determine the type of this overloaded NEON intrinsic.
5058*67e74705SXin Li Type = NeonTypeFlags(Result.getZExtValue());
5059*67e74705SXin Li
5060*67e74705SXin Li bool usgn = Type.isUnsigned();
5061*67e74705SXin Li bool quad = Type.isQuad();
5062*67e74705SXin Li
5063*67e74705SXin Li // Handle non-overloaded intrinsics first.
5064*67e74705SXin Li switch (BuiltinID) {
5065*67e74705SXin Li default: break;
5066*67e74705SXin Li case NEON::BI__builtin_neon_vldrq_p128: {
5067*67e74705SXin Li llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5068*67e74705SXin Li Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5069*67e74705SXin Li return Builder.CreateDefaultAlignedLoad(Ptr);
5070*67e74705SXin Li }
5071*67e74705SXin Li case NEON::BI__builtin_neon_vstrq_p128: {
5072*67e74705SXin Li llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5073*67e74705SXin Li Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5074*67e74705SXin Li return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5075*67e74705SXin Li }
5076*67e74705SXin Li case NEON::BI__builtin_neon_vcvts_u32_f32:
5077*67e74705SXin Li case NEON::BI__builtin_neon_vcvtd_u64_f64:
5078*67e74705SXin Li usgn = true;
5079*67e74705SXin Li // FALL THROUGH
5080*67e74705SXin Li case NEON::BI__builtin_neon_vcvts_s32_f32:
5081*67e74705SXin Li case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5082*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5083*67e74705SXin Li bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5084*67e74705SXin Li llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5085*67e74705SXin Li llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5086*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5087*67e74705SXin Li if (usgn)
5088*67e74705SXin Li return Builder.CreateFPToUI(Ops[0], InTy);
5089*67e74705SXin Li return Builder.CreateFPToSI(Ops[0], InTy);
5090*67e74705SXin Li }
5091*67e74705SXin Li case NEON::BI__builtin_neon_vcvts_f32_u32:
5092*67e74705SXin Li case NEON::BI__builtin_neon_vcvtd_f64_u64:
5093*67e74705SXin Li usgn = true;
5094*67e74705SXin Li // FALL THROUGH
5095*67e74705SXin Li case NEON::BI__builtin_neon_vcvts_f32_s32:
5096*67e74705SXin Li case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5097*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5098*67e74705SXin Li bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5099*67e74705SXin Li llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5100*67e74705SXin Li llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5101*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5102*67e74705SXin Li if (usgn)
5103*67e74705SXin Li return Builder.CreateUIToFP(Ops[0], FTy);
5104*67e74705SXin Li return Builder.CreateSIToFP(Ops[0], FTy);
5105*67e74705SXin Li }
5106*67e74705SXin Li case NEON::BI__builtin_neon_vpaddd_s64: {
5107*67e74705SXin Li llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5108*67e74705SXin Li Value *Vec = EmitScalarExpr(E->getArg(0));
5109*67e74705SXin Li // The vector is v2f64, so make sure it's bitcast to that.
5110*67e74705SXin Li Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5111*67e74705SXin Li llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5112*67e74705SXin Li llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5113*67e74705SXin Li Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5114*67e74705SXin Li Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5115*67e74705SXin Li // Pairwise addition of a v2f64 into a scalar f64.
5116*67e74705SXin Li return Builder.CreateAdd(Op0, Op1, "vpaddd");
5117*67e74705SXin Li }
5118*67e74705SXin Li case NEON::BI__builtin_neon_vpaddd_f64: {
5119*67e74705SXin Li llvm::Type *Ty =
5120*67e74705SXin Li llvm::VectorType::get(DoubleTy, 2);
5121*67e74705SXin Li Value *Vec = EmitScalarExpr(E->getArg(0));
5122*67e74705SXin Li // The vector is v2f64, so make sure it's bitcast to that.
5123*67e74705SXin Li Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5124*67e74705SXin Li llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5125*67e74705SXin Li llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5126*67e74705SXin Li Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5127*67e74705SXin Li Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5128*67e74705SXin Li // Pairwise addition of a v2f64 into a scalar f64.
5129*67e74705SXin Li return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5130*67e74705SXin Li }
5131*67e74705SXin Li case NEON::BI__builtin_neon_vpadds_f32: {
5132*67e74705SXin Li llvm::Type *Ty =
5133*67e74705SXin Li llvm::VectorType::get(FloatTy, 2);
5134*67e74705SXin Li Value *Vec = EmitScalarExpr(E->getArg(0));
5135*67e74705SXin Li // The vector is v2f32, so make sure it's bitcast to that.
5136*67e74705SXin Li Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5137*67e74705SXin Li llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5138*67e74705SXin Li llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5139*67e74705SXin Li Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5140*67e74705SXin Li Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5141*67e74705SXin Li // Pairwise addition of a v2f32 into a scalar f32.
5142*67e74705SXin Li return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5143*67e74705SXin Li }
5144*67e74705SXin Li case NEON::BI__builtin_neon_vceqzd_s64:
5145*67e74705SXin Li case NEON::BI__builtin_neon_vceqzd_f64:
5146*67e74705SXin Li case NEON::BI__builtin_neon_vceqzs_f32:
5147*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5148*67e74705SXin Li return EmitAArch64CompareBuiltinExpr(
5149*67e74705SXin Li Ops[0], ConvertType(E->getCallReturnType(getContext())),
5150*67e74705SXin Li ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5151*67e74705SXin Li case NEON::BI__builtin_neon_vcgezd_s64:
5152*67e74705SXin Li case NEON::BI__builtin_neon_vcgezd_f64:
5153*67e74705SXin Li case NEON::BI__builtin_neon_vcgezs_f32:
5154*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5155*67e74705SXin Li return EmitAArch64CompareBuiltinExpr(
5156*67e74705SXin Li Ops[0], ConvertType(E->getCallReturnType(getContext())),
5157*67e74705SXin Li ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5158*67e74705SXin Li case NEON::BI__builtin_neon_vclezd_s64:
5159*67e74705SXin Li case NEON::BI__builtin_neon_vclezd_f64:
5160*67e74705SXin Li case NEON::BI__builtin_neon_vclezs_f32:
5161*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5162*67e74705SXin Li return EmitAArch64CompareBuiltinExpr(
5163*67e74705SXin Li Ops[0], ConvertType(E->getCallReturnType(getContext())),
5164*67e74705SXin Li ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5165*67e74705SXin Li case NEON::BI__builtin_neon_vcgtzd_s64:
5166*67e74705SXin Li case NEON::BI__builtin_neon_vcgtzd_f64:
5167*67e74705SXin Li case NEON::BI__builtin_neon_vcgtzs_f32:
5168*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5169*67e74705SXin Li return EmitAArch64CompareBuiltinExpr(
5170*67e74705SXin Li Ops[0], ConvertType(E->getCallReturnType(getContext())),
5171*67e74705SXin Li ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5172*67e74705SXin Li case NEON::BI__builtin_neon_vcltzd_s64:
5173*67e74705SXin Li case NEON::BI__builtin_neon_vcltzd_f64:
5174*67e74705SXin Li case NEON::BI__builtin_neon_vcltzs_f32:
5175*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5176*67e74705SXin Li return EmitAArch64CompareBuiltinExpr(
5177*67e74705SXin Li Ops[0], ConvertType(E->getCallReturnType(getContext())),
5178*67e74705SXin Li ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5179*67e74705SXin Li
5180*67e74705SXin Li case NEON::BI__builtin_neon_vceqzd_u64: {
5181*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5182*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5183*67e74705SXin Li Ops[0] =
5184*67e74705SXin Li Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5185*67e74705SXin Li return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5186*67e74705SXin Li }
5187*67e74705SXin Li case NEON::BI__builtin_neon_vceqd_f64:
5188*67e74705SXin Li case NEON::BI__builtin_neon_vcled_f64:
5189*67e74705SXin Li case NEON::BI__builtin_neon_vcltd_f64:
5190*67e74705SXin Li case NEON::BI__builtin_neon_vcged_f64:
5191*67e74705SXin Li case NEON::BI__builtin_neon_vcgtd_f64: {
5192*67e74705SXin Li llvm::CmpInst::Predicate P;
5193*67e74705SXin Li switch (BuiltinID) {
5194*67e74705SXin Li default: llvm_unreachable("missing builtin ID in switch!");
5195*67e74705SXin Li case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5196*67e74705SXin Li case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5197*67e74705SXin Li case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5198*67e74705SXin Li case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5199*67e74705SXin Li case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5200*67e74705SXin Li }
5201*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(1)));
5202*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5203*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5204*67e74705SXin Li Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5205*67e74705SXin Li return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5206*67e74705SXin Li }
5207*67e74705SXin Li case NEON::BI__builtin_neon_vceqs_f32:
5208*67e74705SXin Li case NEON::BI__builtin_neon_vcles_f32:
5209*67e74705SXin Li case NEON::BI__builtin_neon_vclts_f32:
5210*67e74705SXin Li case NEON::BI__builtin_neon_vcges_f32:
5211*67e74705SXin Li case NEON::BI__builtin_neon_vcgts_f32: {
5212*67e74705SXin Li llvm::CmpInst::Predicate P;
5213*67e74705SXin Li switch (BuiltinID) {
5214*67e74705SXin Li default: llvm_unreachable("missing builtin ID in switch!");
5215*67e74705SXin Li case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5216*67e74705SXin Li case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5217*67e74705SXin Li case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5218*67e74705SXin Li case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5219*67e74705SXin Li case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5220*67e74705SXin Li }
5221*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(1)));
5222*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5223*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5224*67e74705SXin Li Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5225*67e74705SXin Li return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5226*67e74705SXin Li }
5227*67e74705SXin Li case NEON::BI__builtin_neon_vceqd_s64:
5228*67e74705SXin Li case NEON::BI__builtin_neon_vceqd_u64:
5229*67e74705SXin Li case NEON::BI__builtin_neon_vcgtd_s64:
5230*67e74705SXin Li case NEON::BI__builtin_neon_vcgtd_u64:
5231*67e74705SXin Li case NEON::BI__builtin_neon_vcltd_s64:
5232*67e74705SXin Li case NEON::BI__builtin_neon_vcltd_u64:
5233*67e74705SXin Li case NEON::BI__builtin_neon_vcged_u64:
5234*67e74705SXin Li case NEON::BI__builtin_neon_vcged_s64:
5235*67e74705SXin Li case NEON::BI__builtin_neon_vcled_u64:
5236*67e74705SXin Li case NEON::BI__builtin_neon_vcled_s64: {
5237*67e74705SXin Li llvm::CmpInst::Predicate P;
5238*67e74705SXin Li switch (BuiltinID) {
5239*67e74705SXin Li default: llvm_unreachable("missing builtin ID in switch!");
5240*67e74705SXin Li case NEON::BI__builtin_neon_vceqd_s64:
5241*67e74705SXin Li case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5242*67e74705SXin Li case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5243*67e74705SXin Li case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5244*67e74705SXin Li case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5245*67e74705SXin Li case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5246*67e74705SXin Li case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5247*67e74705SXin Li case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5248*67e74705SXin Li case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5249*67e74705SXin Li case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5250*67e74705SXin Li }
5251*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(1)));
5252*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5253*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5254*67e74705SXin Li Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5255*67e74705SXin Li return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5256*67e74705SXin Li }
5257*67e74705SXin Li case NEON::BI__builtin_neon_vtstd_s64:
5258*67e74705SXin Li case NEON::BI__builtin_neon_vtstd_u64: {
5259*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(1)));
5260*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5261*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5262*67e74705SXin Li Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5263*67e74705SXin Li Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5264*67e74705SXin Li llvm::Constant::getNullValue(Int64Ty));
5265*67e74705SXin Li return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5266*67e74705SXin Li }
5267*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_i8:
5268*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_i16:
5269*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_i32:
5270*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_i64:
5271*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_f32:
5272*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_i8:
5273*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_i16:
5274*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_i32:
5275*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_i64:
5276*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_f32:
5277*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(2)));
5278*67e74705SXin Li return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5279*67e74705SXin Li case NEON::BI__builtin_neon_vset_lane_f64:
5280*67e74705SXin Li // The vector type needs a cast for the v1f64 variant.
5281*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1],
5282*67e74705SXin Li llvm::VectorType::get(DoubleTy, 1));
5283*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(2)));
5284*67e74705SXin Li return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5285*67e74705SXin Li case NEON::BI__builtin_neon_vsetq_lane_f64:
5286*67e74705SXin Li // The vector type needs a cast for the v2f64 variant.
5287*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1],
5288*67e74705SXin Li llvm::VectorType::get(DoubleTy, 2));
5289*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(2)));
5290*67e74705SXin Li return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5291*67e74705SXin Li
5292*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_i8:
5293*67e74705SXin Li case NEON::BI__builtin_neon_vdupb_lane_i8:
5294*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5295*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5296*67e74705SXin Li "vget_lane");
5297*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_i8:
5298*67e74705SXin Li case NEON::BI__builtin_neon_vdupb_laneq_i8:
5299*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5300*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5301*67e74705SXin Li "vgetq_lane");
5302*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_i16:
5303*67e74705SXin Li case NEON::BI__builtin_neon_vduph_lane_i16:
5304*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5305*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5306*67e74705SXin Li "vget_lane");
5307*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_i16:
5308*67e74705SXin Li case NEON::BI__builtin_neon_vduph_laneq_i16:
5309*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5310*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5311*67e74705SXin Li "vgetq_lane");
5312*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_i32:
5313*67e74705SXin Li case NEON::BI__builtin_neon_vdups_lane_i32:
5314*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5315*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5316*67e74705SXin Li "vget_lane");
5317*67e74705SXin Li case NEON::BI__builtin_neon_vdups_lane_f32:
5318*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0],
5319*67e74705SXin Li llvm::VectorType::get(FloatTy, 2));
5320*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5321*67e74705SXin Li "vdups_lane");
5322*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_i32:
5323*67e74705SXin Li case NEON::BI__builtin_neon_vdups_laneq_i32:
5324*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5325*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5326*67e74705SXin Li "vgetq_lane");
5327*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_i64:
5328*67e74705SXin Li case NEON::BI__builtin_neon_vdupd_lane_i64:
5329*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5330*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5331*67e74705SXin Li "vget_lane");
5332*67e74705SXin Li case NEON::BI__builtin_neon_vdupd_lane_f64:
5333*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0],
5334*67e74705SXin Li llvm::VectorType::get(DoubleTy, 1));
5335*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5336*67e74705SXin Li "vdupd_lane");
5337*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_i64:
5338*67e74705SXin Li case NEON::BI__builtin_neon_vdupd_laneq_i64:
5339*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5340*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5341*67e74705SXin Li "vgetq_lane");
5342*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_f32:
5343*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0],
5344*67e74705SXin Li llvm::VectorType::get(FloatTy, 2));
5345*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5346*67e74705SXin Li "vget_lane");
5347*67e74705SXin Li case NEON::BI__builtin_neon_vget_lane_f64:
5348*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0],
5349*67e74705SXin Li llvm::VectorType::get(DoubleTy, 1));
5350*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5351*67e74705SXin Li "vget_lane");
5352*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_f32:
5353*67e74705SXin Li case NEON::BI__builtin_neon_vdups_laneq_f32:
5354*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0],
5355*67e74705SXin Li llvm::VectorType::get(FloatTy, 4));
5356*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5357*67e74705SXin Li "vgetq_lane");
5358*67e74705SXin Li case NEON::BI__builtin_neon_vgetq_lane_f64:
5359*67e74705SXin Li case NEON::BI__builtin_neon_vdupd_laneq_f64:
5360*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0],
5361*67e74705SXin Li llvm::VectorType::get(DoubleTy, 2));
5362*67e74705SXin Li return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5363*67e74705SXin Li "vgetq_lane");
5364*67e74705SXin Li case NEON::BI__builtin_neon_vaddd_s64:
5365*67e74705SXin Li case NEON::BI__builtin_neon_vaddd_u64:
5366*67e74705SXin Li return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5367*67e74705SXin Li case NEON::BI__builtin_neon_vsubd_s64:
5368*67e74705SXin Li case NEON::BI__builtin_neon_vsubd_u64:
5369*67e74705SXin Li return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5370*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlalh_s16:
5371*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlslh_s16: {
5372*67e74705SXin Li SmallVector<Value *, 2> ProductOps;
5373*67e74705SXin Li ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5374*67e74705SXin Li ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5375*67e74705SXin Li llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5376*67e74705SXin Li Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5377*67e74705SXin Li ProductOps, "vqdmlXl");
5378*67e74705SXin Li Constant *CI = ConstantInt::get(SizeTy, 0);
5379*67e74705SXin Li Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5380*67e74705SXin Li
5381*67e74705SXin Li unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5382*67e74705SXin Li ? Intrinsic::aarch64_neon_sqadd
5383*67e74705SXin Li : Intrinsic::aarch64_neon_sqsub;
5384*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5385*67e74705SXin Li }
5386*67e74705SXin Li case NEON::BI__builtin_neon_vqshlud_n_s64: {
5387*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(1)));
5388*67e74705SXin Li Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5389*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5390*67e74705SXin Li Ops, "vqshlu_n");
5391*67e74705SXin Li }
5392*67e74705SXin Li case NEON::BI__builtin_neon_vqshld_n_u64:
5393*67e74705SXin Li case NEON::BI__builtin_neon_vqshld_n_s64: {
5394*67e74705SXin Li unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5395*67e74705SXin Li ? Intrinsic::aarch64_neon_uqshl
5396*67e74705SXin Li : Intrinsic::aarch64_neon_sqshl;
5397*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(1)));
5398*67e74705SXin Li Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5399*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5400*67e74705SXin Li }
5401*67e74705SXin Li case NEON::BI__builtin_neon_vrshrd_n_u64:
5402*67e74705SXin Li case NEON::BI__builtin_neon_vrshrd_n_s64: {
5403*67e74705SXin Li unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5404*67e74705SXin Li ? Intrinsic::aarch64_neon_urshl
5405*67e74705SXin Li : Intrinsic::aarch64_neon_srshl;
5406*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(1)));
5407*67e74705SXin Li int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5408*67e74705SXin Li Ops[1] = ConstantInt::get(Int64Ty, -SV);
5409*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5410*67e74705SXin Li }
5411*67e74705SXin Li case NEON::BI__builtin_neon_vrsrad_n_u64:
5412*67e74705SXin Li case NEON::BI__builtin_neon_vrsrad_n_s64: {
5413*67e74705SXin Li unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5414*67e74705SXin Li ? Intrinsic::aarch64_neon_urshl
5415*67e74705SXin Li : Intrinsic::aarch64_neon_srshl;
5416*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5417*67e74705SXin Li Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5418*67e74705SXin Li Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5419*67e74705SXin Li {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5420*67e74705SXin Li return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5421*67e74705SXin Li }
5422*67e74705SXin Li case NEON::BI__builtin_neon_vshld_n_s64:
5423*67e74705SXin Li case NEON::BI__builtin_neon_vshld_n_u64: {
5424*67e74705SXin Li llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5425*67e74705SXin Li return Builder.CreateShl(
5426*67e74705SXin Li Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5427*67e74705SXin Li }
5428*67e74705SXin Li case NEON::BI__builtin_neon_vshrd_n_s64: {
5429*67e74705SXin Li llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5430*67e74705SXin Li return Builder.CreateAShr(
5431*67e74705SXin Li Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5432*67e74705SXin Li Amt->getZExtValue())),
5433*67e74705SXin Li "shrd_n");
5434*67e74705SXin Li }
5435*67e74705SXin Li case NEON::BI__builtin_neon_vshrd_n_u64: {
5436*67e74705SXin Li llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5437*67e74705SXin Li uint64_t ShiftAmt = Amt->getZExtValue();
5438*67e74705SXin Li // Right-shifting an unsigned value by its size yields 0.
5439*67e74705SXin Li if (ShiftAmt == 64)
5440*67e74705SXin Li return ConstantInt::get(Int64Ty, 0);
5441*67e74705SXin Li return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5442*67e74705SXin Li "shrd_n");
5443*67e74705SXin Li }
5444*67e74705SXin Li case NEON::BI__builtin_neon_vsrad_n_s64: {
5445*67e74705SXin Li llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5446*67e74705SXin Li Ops[1] = Builder.CreateAShr(
5447*67e74705SXin Li Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5448*67e74705SXin Li Amt->getZExtValue())),
5449*67e74705SXin Li "shrd_n");
5450*67e74705SXin Li return Builder.CreateAdd(Ops[0], Ops[1]);
5451*67e74705SXin Li }
5452*67e74705SXin Li case NEON::BI__builtin_neon_vsrad_n_u64: {
5453*67e74705SXin Li llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5454*67e74705SXin Li uint64_t ShiftAmt = Amt->getZExtValue();
5455*67e74705SXin Li // Right-shifting an unsigned value by its size yields 0.
5456*67e74705SXin Li // As Op + 0 = Op, return Ops[0] directly.
5457*67e74705SXin Li if (ShiftAmt == 64)
5458*67e74705SXin Li return Ops[0];
5459*67e74705SXin Li Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5460*67e74705SXin Li "shrd_n");
5461*67e74705SXin Li return Builder.CreateAdd(Ops[0], Ops[1]);
5462*67e74705SXin Li }
5463*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5464*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5465*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5466*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5467*67e74705SXin Li Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5468*67e74705SXin Li "lane");
5469*67e74705SXin Li SmallVector<Value *, 2> ProductOps;
5470*67e74705SXin Li ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5471*67e74705SXin Li ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5472*67e74705SXin Li llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5473*67e74705SXin Li Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5474*67e74705SXin Li ProductOps, "vqdmlXl");
5475*67e74705SXin Li Constant *CI = ConstantInt::get(SizeTy, 0);
5476*67e74705SXin Li Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5477*67e74705SXin Li Ops.pop_back();
5478*67e74705SXin Li
5479*67e74705SXin Li unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5480*67e74705SXin Li BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5481*67e74705SXin Li ? Intrinsic::aarch64_neon_sqadd
5482*67e74705SXin Li : Intrinsic::aarch64_neon_sqsub;
5483*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5484*67e74705SXin Li }
5485*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlals_s32:
5486*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlsls_s32: {
5487*67e74705SXin Li SmallVector<Value *, 2> ProductOps;
5488*67e74705SXin Li ProductOps.push_back(Ops[1]);
5489*67e74705SXin Li ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5490*67e74705SXin Li Ops[1] =
5491*67e74705SXin Li EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5492*67e74705SXin Li ProductOps, "vqdmlXl");
5493*67e74705SXin Li
5494*67e74705SXin Li unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5495*67e74705SXin Li ? Intrinsic::aarch64_neon_sqadd
5496*67e74705SXin Li : Intrinsic::aarch64_neon_sqsub;
5497*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5498*67e74705SXin Li }
5499*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5500*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5501*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5502*67e74705SXin Li case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5503*67e74705SXin Li Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5504*67e74705SXin Li "lane");
5505*67e74705SXin Li SmallVector<Value *, 2> ProductOps;
5506*67e74705SXin Li ProductOps.push_back(Ops[1]);
5507*67e74705SXin Li ProductOps.push_back(Ops[2]);
5508*67e74705SXin Li Ops[1] =
5509*67e74705SXin Li EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5510*67e74705SXin Li ProductOps, "vqdmlXl");
5511*67e74705SXin Li Ops.pop_back();
5512*67e74705SXin Li
5513*67e74705SXin Li unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5514*67e74705SXin Li BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5515*67e74705SXin Li ? Intrinsic::aarch64_neon_sqadd
5516*67e74705SXin Li : Intrinsic::aarch64_neon_sqsub;
5517*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5518*67e74705SXin Li }
5519*67e74705SXin Li }
5520*67e74705SXin Li
5521*67e74705SXin Li llvm::VectorType *VTy = GetNeonType(this, Type);
5522*67e74705SXin Li llvm::Type *Ty = VTy;
5523*67e74705SXin Li if (!Ty)
5524*67e74705SXin Li return nullptr;
5525*67e74705SXin Li
5526*67e74705SXin Li // Not all intrinsics handled by the common case work for AArch64 yet, so only
5527*67e74705SXin Li // defer to common code if it's been added to our special map.
5528*67e74705SXin Li Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
5529*67e74705SXin Li AArch64SIMDIntrinsicsProvenSorted);
5530*67e74705SXin Li
5531*67e74705SXin Li if (Builtin)
5532*67e74705SXin Li return EmitCommonNeonBuiltinExpr(
5533*67e74705SXin Li Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5534*67e74705SXin Li Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5535*67e74705SXin Li /*never use addresses*/ Address::invalid(), Address::invalid());
5536*67e74705SXin Li
5537*67e74705SXin Li if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
5538*67e74705SXin Li return V;
5539*67e74705SXin Li
5540*67e74705SXin Li unsigned Int;
5541*67e74705SXin Li switch (BuiltinID) {
5542*67e74705SXin Li default: return nullptr;
5543*67e74705SXin Li case NEON::BI__builtin_neon_vbsl_v:
5544*67e74705SXin Li case NEON::BI__builtin_neon_vbslq_v: {
5545*67e74705SXin Li llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
5546*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
5547*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
5548*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
5549*67e74705SXin Li
5550*67e74705SXin Li Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
5551*67e74705SXin Li Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
5552*67e74705SXin Li Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
5553*67e74705SXin Li return Builder.CreateBitCast(Ops[0], Ty);
5554*67e74705SXin Li }
5555*67e74705SXin Li case NEON::BI__builtin_neon_vfma_lane_v:
5556*67e74705SXin Li case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
5557*67e74705SXin Li // The ARM builtins (and instructions) have the addend as the first
5558*67e74705SXin Li // operand, but the 'fma' intrinsics have it last. Swap it around here.
5559*67e74705SXin Li Value *Addend = Ops[0];
5560*67e74705SXin Li Value *Multiplicand = Ops[1];
5561*67e74705SXin Li Value *LaneSource = Ops[2];
5562*67e74705SXin Li Ops[0] = Multiplicand;
5563*67e74705SXin Li Ops[1] = LaneSource;
5564*67e74705SXin Li Ops[2] = Addend;
5565*67e74705SXin Li
5566*67e74705SXin Li // Now adjust things to handle the lane access.
5567*67e74705SXin Li llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
5568*67e74705SXin Li llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
5569*67e74705SXin Li VTy;
5570*67e74705SXin Li llvm::Constant *cst = cast<Constant>(Ops[3]);
5571*67e74705SXin Li Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
5572*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
5573*67e74705SXin Li Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
5574*67e74705SXin Li
5575*67e74705SXin Li Ops.pop_back();
5576*67e74705SXin Li Int = Intrinsic::fma;
5577*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
5578*67e74705SXin Li }
5579*67e74705SXin Li case NEON::BI__builtin_neon_vfma_laneq_v: {
5580*67e74705SXin Li llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5581*67e74705SXin Li // v1f64 fma should be mapped to Neon scalar f64 fma
5582*67e74705SXin Li if (VTy && VTy->getElementType() == DoubleTy) {
5583*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5584*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5585*67e74705SXin Li llvm::Type *VTy = GetNeonType(this,
5586*67e74705SXin Li NeonTypeFlags(NeonTypeFlags::Float64, false, true));
5587*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
5588*67e74705SXin Li Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5589*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
5590*67e74705SXin Li Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5591*67e74705SXin Li return Builder.CreateBitCast(Result, Ty);
5592*67e74705SXin Li }
5593*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5594*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5595*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5596*67e74705SXin Li
5597*67e74705SXin Li llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
5598*67e74705SXin Li VTy->getNumElements() * 2);
5599*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], STy);
5600*67e74705SXin Li Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
5601*67e74705SXin Li cast<ConstantInt>(Ops[3]));
5602*67e74705SXin Li Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
5603*67e74705SXin Li
5604*67e74705SXin Li return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5605*67e74705SXin Li }
5606*67e74705SXin Li case NEON::BI__builtin_neon_vfmaq_laneq_v: {
5607*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5608*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5609*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5610*67e74705SXin Li
5611*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5612*67e74705SXin Li Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
5613*67e74705SXin Li return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5614*67e74705SXin Li }
5615*67e74705SXin Li case NEON::BI__builtin_neon_vfmas_lane_f32:
5616*67e74705SXin Li case NEON::BI__builtin_neon_vfmas_laneq_f32:
5617*67e74705SXin Li case NEON::BI__builtin_neon_vfmad_lane_f64:
5618*67e74705SXin Li case NEON::BI__builtin_neon_vfmad_laneq_f64: {
5619*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(3)));
5620*67e74705SXin Li llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
5621*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5622*67e74705SXin Li Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5623*67e74705SXin Li return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5624*67e74705SXin Li }
5625*67e74705SXin Li case NEON::BI__builtin_neon_vmull_v:
5626*67e74705SXin Li // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5627*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
5628*67e74705SXin Li if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
5629*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5630*67e74705SXin Li case NEON::BI__builtin_neon_vmax_v:
5631*67e74705SXin Li case NEON::BI__builtin_neon_vmaxq_v:
5632*67e74705SXin Li // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5633*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
5634*67e74705SXin Li if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
5635*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
5636*67e74705SXin Li case NEON::BI__builtin_neon_vmin_v:
5637*67e74705SXin Li case NEON::BI__builtin_neon_vminq_v:
5638*67e74705SXin Li // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5639*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
5640*67e74705SXin Li if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
5641*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
5642*67e74705SXin Li case NEON::BI__builtin_neon_vabd_v:
5643*67e74705SXin Li case NEON::BI__builtin_neon_vabdq_v:
5644*67e74705SXin Li // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5645*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
5646*67e74705SXin Li if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
5647*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
5648*67e74705SXin Li case NEON::BI__builtin_neon_vpadal_v:
5649*67e74705SXin Li case NEON::BI__builtin_neon_vpadalq_v: {
5650*67e74705SXin Li unsigned ArgElts = VTy->getNumElements();
5651*67e74705SXin Li llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
5652*67e74705SXin Li unsigned BitWidth = EltTy->getBitWidth();
5653*67e74705SXin Li llvm::Type *ArgTy = llvm::VectorType::get(
5654*67e74705SXin Li llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
5655*67e74705SXin Li llvm::Type* Tys[2] = { VTy, ArgTy };
5656*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
5657*67e74705SXin Li SmallVector<llvm::Value*, 1> TmpOps;
5658*67e74705SXin Li TmpOps.push_back(Ops[1]);
5659*67e74705SXin Li Function *F = CGM.getIntrinsic(Int, Tys);
5660*67e74705SXin Li llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
5661*67e74705SXin Li llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
5662*67e74705SXin Li return Builder.CreateAdd(tmp, addend);
5663*67e74705SXin Li }
5664*67e74705SXin Li case NEON::BI__builtin_neon_vpmin_v:
5665*67e74705SXin Li case NEON::BI__builtin_neon_vpminq_v:
5666*67e74705SXin Li // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5667*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
5668*67e74705SXin Li if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
5669*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
5670*67e74705SXin Li case NEON::BI__builtin_neon_vpmax_v:
5671*67e74705SXin Li case NEON::BI__builtin_neon_vpmaxq_v:
5672*67e74705SXin Li // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5673*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
5674*67e74705SXin Li if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
5675*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
5676*67e74705SXin Li case NEON::BI__builtin_neon_vminnm_v:
5677*67e74705SXin Li case NEON::BI__builtin_neon_vminnmq_v:
5678*67e74705SXin Li Int = Intrinsic::aarch64_neon_fminnm;
5679*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
5680*67e74705SXin Li case NEON::BI__builtin_neon_vmaxnm_v:
5681*67e74705SXin Li case NEON::BI__builtin_neon_vmaxnmq_v:
5682*67e74705SXin Li Int = Intrinsic::aarch64_neon_fmaxnm;
5683*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
5684*67e74705SXin Li case NEON::BI__builtin_neon_vrecpss_f32: {
5685*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(1)));
5686*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
5687*67e74705SXin Li Ops, "vrecps");
5688*67e74705SXin Li }
5689*67e74705SXin Li case NEON::BI__builtin_neon_vrecpsd_f64: {
5690*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(1)));
5691*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
5692*67e74705SXin Li Ops, "vrecps");
5693*67e74705SXin Li }
5694*67e74705SXin Li case NEON::BI__builtin_neon_vqshrun_n_v:
5695*67e74705SXin Li Int = Intrinsic::aarch64_neon_sqshrun;
5696*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
5697*67e74705SXin Li case NEON::BI__builtin_neon_vqrshrun_n_v:
5698*67e74705SXin Li Int = Intrinsic::aarch64_neon_sqrshrun;
5699*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
5700*67e74705SXin Li case NEON::BI__builtin_neon_vqshrn_n_v:
5701*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
5702*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
5703*67e74705SXin Li case NEON::BI__builtin_neon_vrshrn_n_v:
5704*67e74705SXin Li Int = Intrinsic::aarch64_neon_rshrn;
5705*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
5706*67e74705SXin Li case NEON::BI__builtin_neon_vqrshrn_n_v:
5707*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
5708*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
5709*67e74705SXin Li case NEON::BI__builtin_neon_vrnda_v:
5710*67e74705SXin Li case NEON::BI__builtin_neon_vrndaq_v: {
5711*67e74705SXin Li Int = Intrinsic::round;
5712*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
5713*67e74705SXin Li }
5714*67e74705SXin Li case NEON::BI__builtin_neon_vrndi_v:
5715*67e74705SXin Li case NEON::BI__builtin_neon_vrndiq_v: {
5716*67e74705SXin Li Int = Intrinsic::nearbyint;
5717*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
5718*67e74705SXin Li }
5719*67e74705SXin Li case NEON::BI__builtin_neon_vrndm_v:
5720*67e74705SXin Li case NEON::BI__builtin_neon_vrndmq_v: {
5721*67e74705SXin Li Int = Intrinsic::floor;
5722*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
5723*67e74705SXin Li }
5724*67e74705SXin Li case NEON::BI__builtin_neon_vrndn_v:
5725*67e74705SXin Li case NEON::BI__builtin_neon_vrndnq_v: {
5726*67e74705SXin Li Int = Intrinsic::aarch64_neon_frintn;
5727*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
5728*67e74705SXin Li }
5729*67e74705SXin Li case NEON::BI__builtin_neon_vrndp_v:
5730*67e74705SXin Li case NEON::BI__builtin_neon_vrndpq_v: {
5731*67e74705SXin Li Int = Intrinsic::ceil;
5732*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
5733*67e74705SXin Li }
5734*67e74705SXin Li case NEON::BI__builtin_neon_vrndx_v:
5735*67e74705SXin Li case NEON::BI__builtin_neon_vrndxq_v: {
5736*67e74705SXin Li Int = Intrinsic::rint;
5737*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
5738*67e74705SXin Li }
5739*67e74705SXin Li case NEON::BI__builtin_neon_vrnd_v:
5740*67e74705SXin Li case NEON::BI__builtin_neon_vrndq_v: {
5741*67e74705SXin Li Int = Intrinsic::trunc;
5742*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
5743*67e74705SXin Li }
5744*67e74705SXin Li case NEON::BI__builtin_neon_vceqz_v:
5745*67e74705SXin Li case NEON::BI__builtin_neon_vceqzq_v:
5746*67e74705SXin Li return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5747*67e74705SXin Li ICmpInst::ICMP_EQ, "vceqz");
5748*67e74705SXin Li case NEON::BI__builtin_neon_vcgez_v:
5749*67e74705SXin Li case NEON::BI__builtin_neon_vcgezq_v:
5750*67e74705SXin Li return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5751*67e74705SXin Li ICmpInst::ICMP_SGE, "vcgez");
5752*67e74705SXin Li case NEON::BI__builtin_neon_vclez_v:
5753*67e74705SXin Li case NEON::BI__builtin_neon_vclezq_v:
5754*67e74705SXin Li return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5755*67e74705SXin Li ICmpInst::ICMP_SLE, "vclez");
5756*67e74705SXin Li case NEON::BI__builtin_neon_vcgtz_v:
5757*67e74705SXin Li case NEON::BI__builtin_neon_vcgtzq_v:
5758*67e74705SXin Li return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5759*67e74705SXin Li ICmpInst::ICMP_SGT, "vcgtz");
5760*67e74705SXin Li case NEON::BI__builtin_neon_vcltz_v:
5761*67e74705SXin Li case NEON::BI__builtin_neon_vcltzq_v:
5762*67e74705SXin Li return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5763*67e74705SXin Li ICmpInst::ICMP_SLT, "vcltz");
5764*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_f64_v:
5765*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_f64_v:
5766*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5767*67e74705SXin Li Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
5768*67e74705SXin Li return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5769*67e74705SXin Li : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5770*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_f64_f32: {
5771*67e74705SXin Li assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
5772*67e74705SXin Li "unexpected vcvt_f64_f32 builtin");
5773*67e74705SXin Li NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
5774*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5775*67e74705SXin Li
5776*67e74705SXin Li return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
5777*67e74705SXin Li }
5778*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_f32_f64: {
5779*67e74705SXin Li assert(Type.getEltType() == NeonTypeFlags::Float32 &&
5780*67e74705SXin Li "unexpected vcvt_f32_f64 builtin");
5781*67e74705SXin Li NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
5782*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5783*67e74705SXin Li
5784*67e74705SXin Li return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
5785*67e74705SXin Li }
5786*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_s32_v:
5787*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_u32_v:
5788*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_s64_v:
5789*67e74705SXin Li case NEON::BI__builtin_neon_vcvt_u64_v:
5790*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_s32_v:
5791*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_u32_v:
5792*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_s64_v:
5793*67e74705SXin Li case NEON::BI__builtin_neon_vcvtq_u64_v: {
5794*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5795*67e74705SXin Li if (usgn)
5796*67e74705SXin Li return Builder.CreateFPToUI(Ops[0], Ty);
5797*67e74705SXin Li return Builder.CreateFPToSI(Ops[0], Ty);
5798*67e74705SXin Li }
5799*67e74705SXin Li case NEON::BI__builtin_neon_vcvta_s32_v:
5800*67e74705SXin Li case NEON::BI__builtin_neon_vcvtaq_s32_v:
5801*67e74705SXin Li case NEON::BI__builtin_neon_vcvta_u32_v:
5802*67e74705SXin Li case NEON::BI__builtin_neon_vcvtaq_u32_v:
5803*67e74705SXin Li case NEON::BI__builtin_neon_vcvta_s64_v:
5804*67e74705SXin Li case NEON::BI__builtin_neon_vcvtaq_s64_v:
5805*67e74705SXin Li case NEON::BI__builtin_neon_vcvta_u64_v:
5806*67e74705SXin Li case NEON::BI__builtin_neon_vcvtaq_u64_v: {
5807*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
5808*67e74705SXin Li llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5809*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
5810*67e74705SXin Li }
5811*67e74705SXin Li case NEON::BI__builtin_neon_vcvtm_s32_v:
5812*67e74705SXin Li case NEON::BI__builtin_neon_vcvtmq_s32_v:
5813*67e74705SXin Li case NEON::BI__builtin_neon_vcvtm_u32_v:
5814*67e74705SXin Li case NEON::BI__builtin_neon_vcvtmq_u32_v:
5815*67e74705SXin Li case NEON::BI__builtin_neon_vcvtm_s64_v:
5816*67e74705SXin Li case NEON::BI__builtin_neon_vcvtmq_s64_v:
5817*67e74705SXin Li case NEON::BI__builtin_neon_vcvtm_u64_v:
5818*67e74705SXin Li case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5819*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
5820*67e74705SXin Li llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5821*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
5822*67e74705SXin Li }
5823*67e74705SXin Li case NEON::BI__builtin_neon_vcvtn_s32_v:
5824*67e74705SXin Li case NEON::BI__builtin_neon_vcvtnq_s32_v:
5825*67e74705SXin Li case NEON::BI__builtin_neon_vcvtn_u32_v:
5826*67e74705SXin Li case NEON::BI__builtin_neon_vcvtnq_u32_v:
5827*67e74705SXin Li case NEON::BI__builtin_neon_vcvtn_s64_v:
5828*67e74705SXin Li case NEON::BI__builtin_neon_vcvtnq_s64_v:
5829*67e74705SXin Li case NEON::BI__builtin_neon_vcvtn_u64_v:
5830*67e74705SXin Li case NEON::BI__builtin_neon_vcvtnq_u64_v: {
5831*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
5832*67e74705SXin Li llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5833*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
5834*67e74705SXin Li }
5835*67e74705SXin Li case NEON::BI__builtin_neon_vcvtp_s32_v:
5836*67e74705SXin Li case NEON::BI__builtin_neon_vcvtpq_s32_v:
5837*67e74705SXin Li case NEON::BI__builtin_neon_vcvtp_u32_v:
5838*67e74705SXin Li case NEON::BI__builtin_neon_vcvtpq_u32_v:
5839*67e74705SXin Li case NEON::BI__builtin_neon_vcvtp_s64_v:
5840*67e74705SXin Li case NEON::BI__builtin_neon_vcvtpq_s64_v:
5841*67e74705SXin Li case NEON::BI__builtin_neon_vcvtp_u64_v:
5842*67e74705SXin Li case NEON::BI__builtin_neon_vcvtpq_u64_v: {
5843*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
5844*67e74705SXin Li llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5845*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
5846*67e74705SXin Li }
5847*67e74705SXin Li case NEON::BI__builtin_neon_vmulx_v:
5848*67e74705SXin Li case NEON::BI__builtin_neon_vmulxq_v: {
5849*67e74705SXin Li Int = Intrinsic::aarch64_neon_fmulx;
5850*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
5851*67e74705SXin Li }
5852*67e74705SXin Li case NEON::BI__builtin_neon_vmul_lane_v:
5853*67e74705SXin Li case NEON::BI__builtin_neon_vmul_laneq_v: {
5854*67e74705SXin Li // v1f64 vmul_lane should be mapped to Neon scalar mul lane
5855*67e74705SXin Li bool Quad = false;
5856*67e74705SXin Li if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
5857*67e74705SXin Li Quad = true;
5858*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5859*67e74705SXin Li llvm::Type *VTy = GetNeonType(this,
5860*67e74705SXin Li NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
5861*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5862*67e74705SXin Li Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
5863*67e74705SXin Li Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
5864*67e74705SXin Li return Builder.CreateBitCast(Result, Ty);
5865*67e74705SXin Li }
5866*67e74705SXin Li case NEON::BI__builtin_neon_vnegd_s64:
5867*67e74705SXin Li return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
5868*67e74705SXin Li case NEON::BI__builtin_neon_vpmaxnm_v:
5869*67e74705SXin Li case NEON::BI__builtin_neon_vpmaxnmq_v: {
5870*67e74705SXin Li Int = Intrinsic::aarch64_neon_fmaxnmp;
5871*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
5872*67e74705SXin Li }
5873*67e74705SXin Li case NEON::BI__builtin_neon_vpminnm_v:
5874*67e74705SXin Li case NEON::BI__builtin_neon_vpminnmq_v: {
5875*67e74705SXin Li Int = Intrinsic::aarch64_neon_fminnmp;
5876*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
5877*67e74705SXin Li }
5878*67e74705SXin Li case NEON::BI__builtin_neon_vsqrt_v:
5879*67e74705SXin Li case NEON::BI__builtin_neon_vsqrtq_v: {
5880*67e74705SXin Li Int = Intrinsic::sqrt;
5881*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5882*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
5883*67e74705SXin Li }
5884*67e74705SXin Li case NEON::BI__builtin_neon_vrbit_v:
5885*67e74705SXin Li case NEON::BI__builtin_neon_vrbitq_v: {
5886*67e74705SXin Li Int = Intrinsic::aarch64_neon_rbit;
5887*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
5888*67e74705SXin Li }
5889*67e74705SXin Li case NEON::BI__builtin_neon_vaddv_u8:
5890*67e74705SXin Li // FIXME: These are handled by the AArch64 scalar code.
5891*67e74705SXin Li usgn = true;
5892*67e74705SXin Li // FALLTHROUGH
5893*67e74705SXin Li case NEON::BI__builtin_neon_vaddv_s8: {
5894*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5895*67e74705SXin Li Ty = Int32Ty;
5896*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 8);
5897*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
5898*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5899*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5900*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int8Ty);
5901*67e74705SXin Li }
5902*67e74705SXin Li case NEON::BI__builtin_neon_vaddv_u16:
5903*67e74705SXin Li usgn = true;
5904*67e74705SXin Li // FALLTHROUGH
5905*67e74705SXin Li case NEON::BI__builtin_neon_vaddv_s16: {
5906*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5907*67e74705SXin Li Ty = Int32Ty;
5908*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 4);
5909*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
5910*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5911*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5912*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
5913*67e74705SXin Li }
5914*67e74705SXin Li case NEON::BI__builtin_neon_vaddvq_u8:
5915*67e74705SXin Li usgn = true;
5916*67e74705SXin Li // FALLTHROUGH
5917*67e74705SXin Li case NEON::BI__builtin_neon_vaddvq_s8: {
5918*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5919*67e74705SXin Li Ty = Int32Ty;
5920*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 16);
5921*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
5922*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5923*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5924*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int8Ty);
5925*67e74705SXin Li }
5926*67e74705SXin Li case NEON::BI__builtin_neon_vaddvq_u16:
5927*67e74705SXin Li usgn = true;
5928*67e74705SXin Li // FALLTHROUGH
5929*67e74705SXin Li case NEON::BI__builtin_neon_vaddvq_s16: {
5930*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5931*67e74705SXin Li Ty = Int32Ty;
5932*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 8);
5933*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
5934*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5935*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5936*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
5937*67e74705SXin Li }
5938*67e74705SXin Li case NEON::BI__builtin_neon_vmaxv_u8: {
5939*67e74705SXin Li Int = Intrinsic::aarch64_neon_umaxv;
5940*67e74705SXin Li Ty = Int32Ty;
5941*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 8);
5942*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
5943*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5944*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5945*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int8Ty);
5946*67e74705SXin Li }
5947*67e74705SXin Li case NEON::BI__builtin_neon_vmaxv_u16: {
5948*67e74705SXin Li Int = Intrinsic::aarch64_neon_umaxv;
5949*67e74705SXin Li Ty = Int32Ty;
5950*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 4);
5951*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
5952*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5953*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5954*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
5955*67e74705SXin Li }
5956*67e74705SXin Li case NEON::BI__builtin_neon_vmaxvq_u8: {
5957*67e74705SXin Li Int = Intrinsic::aarch64_neon_umaxv;
5958*67e74705SXin Li Ty = Int32Ty;
5959*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 16);
5960*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
5961*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5962*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5963*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int8Ty);
5964*67e74705SXin Li }
5965*67e74705SXin Li case NEON::BI__builtin_neon_vmaxvq_u16: {
5966*67e74705SXin Li Int = Intrinsic::aarch64_neon_umaxv;
5967*67e74705SXin Li Ty = Int32Ty;
5968*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 8);
5969*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
5970*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5971*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5972*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
5973*67e74705SXin Li }
5974*67e74705SXin Li case NEON::BI__builtin_neon_vmaxv_s8: {
5975*67e74705SXin Li Int = Intrinsic::aarch64_neon_smaxv;
5976*67e74705SXin Li Ty = Int32Ty;
5977*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 8);
5978*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
5979*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5980*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5981*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int8Ty);
5982*67e74705SXin Li }
5983*67e74705SXin Li case NEON::BI__builtin_neon_vmaxv_s16: {
5984*67e74705SXin Li Int = Intrinsic::aarch64_neon_smaxv;
5985*67e74705SXin Li Ty = Int32Ty;
5986*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 4);
5987*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
5988*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5989*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5990*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
5991*67e74705SXin Li }
5992*67e74705SXin Li case NEON::BI__builtin_neon_vmaxvq_s8: {
5993*67e74705SXin Li Int = Intrinsic::aarch64_neon_smaxv;
5994*67e74705SXin Li Ty = Int32Ty;
5995*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 16);
5996*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
5997*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
5998*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5999*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int8Ty);
6000*67e74705SXin Li }
6001*67e74705SXin Li case NEON::BI__builtin_neon_vmaxvq_s16: {
6002*67e74705SXin Li Int = Intrinsic::aarch64_neon_smaxv;
6003*67e74705SXin Li Ty = Int32Ty;
6004*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 8);
6005*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6006*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6007*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6008*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
6009*67e74705SXin Li }
6010*67e74705SXin Li case NEON::BI__builtin_neon_vminv_u8: {
6011*67e74705SXin Li Int = Intrinsic::aarch64_neon_uminv;
6012*67e74705SXin Li Ty = Int32Ty;
6013*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 8);
6014*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6015*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6016*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6017*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int8Ty);
6018*67e74705SXin Li }
6019*67e74705SXin Li case NEON::BI__builtin_neon_vminv_u16: {
6020*67e74705SXin Li Int = Intrinsic::aarch64_neon_uminv;
6021*67e74705SXin Li Ty = Int32Ty;
6022*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 4);
6023*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6024*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6025*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6026*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
6027*67e74705SXin Li }
6028*67e74705SXin Li case NEON::BI__builtin_neon_vminvq_u8: {
6029*67e74705SXin Li Int = Intrinsic::aarch64_neon_uminv;
6030*67e74705SXin Li Ty = Int32Ty;
6031*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 16);
6032*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6033*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6034*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6035*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int8Ty);
6036*67e74705SXin Li }
6037*67e74705SXin Li case NEON::BI__builtin_neon_vminvq_u16: {
6038*67e74705SXin Li Int = Intrinsic::aarch64_neon_uminv;
6039*67e74705SXin Li Ty = Int32Ty;
6040*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 8);
6041*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6042*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6043*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6044*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
6045*67e74705SXin Li }
6046*67e74705SXin Li case NEON::BI__builtin_neon_vminv_s8: {
6047*67e74705SXin Li Int = Intrinsic::aarch64_neon_sminv;
6048*67e74705SXin Li Ty = Int32Ty;
6049*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 8);
6050*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6051*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6052*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6053*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int8Ty);
6054*67e74705SXin Li }
6055*67e74705SXin Li case NEON::BI__builtin_neon_vminv_s16: {
6056*67e74705SXin Li Int = Intrinsic::aarch64_neon_sminv;
6057*67e74705SXin Li Ty = Int32Ty;
6058*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 4);
6059*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6060*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6061*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6062*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
6063*67e74705SXin Li }
6064*67e74705SXin Li case NEON::BI__builtin_neon_vminvq_s8: {
6065*67e74705SXin Li Int = Intrinsic::aarch64_neon_sminv;
6066*67e74705SXin Li Ty = Int32Ty;
6067*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 16);
6068*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6069*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6070*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6071*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int8Ty);
6072*67e74705SXin Li }
6073*67e74705SXin Li case NEON::BI__builtin_neon_vminvq_s16: {
6074*67e74705SXin Li Int = Intrinsic::aarch64_neon_sminv;
6075*67e74705SXin Li Ty = Int32Ty;
6076*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 8);
6077*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6078*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6079*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6080*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
6081*67e74705SXin Li }
6082*67e74705SXin Li case NEON::BI__builtin_neon_vmul_n_f64: {
6083*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6084*67e74705SXin Li Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6085*67e74705SXin Li return Builder.CreateFMul(Ops[0], RHS);
6086*67e74705SXin Li }
6087*67e74705SXin Li case NEON::BI__builtin_neon_vaddlv_u8: {
6088*67e74705SXin Li Int = Intrinsic::aarch64_neon_uaddlv;
6089*67e74705SXin Li Ty = Int32Ty;
6090*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 8);
6091*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6092*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6093*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6094*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
6095*67e74705SXin Li }
6096*67e74705SXin Li case NEON::BI__builtin_neon_vaddlv_u16: {
6097*67e74705SXin Li Int = Intrinsic::aarch64_neon_uaddlv;
6098*67e74705SXin Li Ty = Int32Ty;
6099*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 4);
6100*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6101*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6102*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6103*67e74705SXin Li }
6104*67e74705SXin Li case NEON::BI__builtin_neon_vaddlvq_u8: {
6105*67e74705SXin Li Int = Intrinsic::aarch64_neon_uaddlv;
6106*67e74705SXin Li Ty = Int32Ty;
6107*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 16);
6108*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6109*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6110*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6111*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
6112*67e74705SXin Li }
6113*67e74705SXin Li case NEON::BI__builtin_neon_vaddlvq_u16: {
6114*67e74705SXin Li Int = Intrinsic::aarch64_neon_uaddlv;
6115*67e74705SXin Li Ty = Int32Ty;
6116*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 8);
6117*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6118*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6119*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6120*67e74705SXin Li }
6121*67e74705SXin Li case NEON::BI__builtin_neon_vaddlv_s8: {
6122*67e74705SXin Li Int = Intrinsic::aarch64_neon_saddlv;
6123*67e74705SXin Li Ty = Int32Ty;
6124*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 8);
6125*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6126*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6127*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6128*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
6129*67e74705SXin Li }
6130*67e74705SXin Li case NEON::BI__builtin_neon_vaddlv_s16: {
6131*67e74705SXin Li Int = Intrinsic::aarch64_neon_saddlv;
6132*67e74705SXin Li Ty = Int32Ty;
6133*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 4);
6134*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6135*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6136*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6137*67e74705SXin Li }
6138*67e74705SXin Li case NEON::BI__builtin_neon_vaddlvq_s8: {
6139*67e74705SXin Li Int = Intrinsic::aarch64_neon_saddlv;
6140*67e74705SXin Li Ty = Int32Ty;
6141*67e74705SXin Li VTy = llvm::VectorType::get(Int8Ty, 16);
6142*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6143*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6144*67e74705SXin Li Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6145*67e74705SXin Li return Builder.CreateTrunc(Ops[0], Int16Ty);
6146*67e74705SXin Li }
6147*67e74705SXin Li case NEON::BI__builtin_neon_vaddlvq_s16: {
6148*67e74705SXin Li Int = Intrinsic::aarch64_neon_saddlv;
6149*67e74705SXin Li Ty = Int32Ty;
6150*67e74705SXin Li VTy = llvm::VectorType::get(Int16Ty, 8);
6151*67e74705SXin Li llvm::Type *Tys[2] = { Ty, VTy };
6152*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(0)));
6153*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6154*67e74705SXin Li }
6155*67e74705SXin Li case NEON::BI__builtin_neon_vsri_n_v:
6156*67e74705SXin Li case NEON::BI__builtin_neon_vsriq_n_v: {
6157*67e74705SXin Li Int = Intrinsic::aarch64_neon_vsri;
6158*67e74705SXin Li llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6159*67e74705SXin Li return EmitNeonCall(Intrin, Ops, "vsri_n");
6160*67e74705SXin Li }
6161*67e74705SXin Li case NEON::BI__builtin_neon_vsli_n_v:
6162*67e74705SXin Li case NEON::BI__builtin_neon_vsliq_n_v: {
6163*67e74705SXin Li Int = Intrinsic::aarch64_neon_vsli;
6164*67e74705SXin Li llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6165*67e74705SXin Li return EmitNeonCall(Intrin, Ops, "vsli_n");
6166*67e74705SXin Li }
6167*67e74705SXin Li case NEON::BI__builtin_neon_vsra_n_v:
6168*67e74705SXin Li case NEON::BI__builtin_neon_vsraq_n_v:
6169*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6170*67e74705SXin Li Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6171*67e74705SXin Li return Builder.CreateAdd(Ops[0], Ops[1]);
6172*67e74705SXin Li case NEON::BI__builtin_neon_vrsra_n_v:
6173*67e74705SXin Li case NEON::BI__builtin_neon_vrsraq_n_v: {
6174*67e74705SXin Li Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6175*67e74705SXin Li SmallVector<llvm::Value*,2> TmpOps;
6176*67e74705SXin Li TmpOps.push_back(Ops[1]);
6177*67e74705SXin Li TmpOps.push_back(Ops[2]);
6178*67e74705SXin Li Function* F = CGM.getIntrinsic(Int, Ty);
6179*67e74705SXin Li llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6180*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6181*67e74705SXin Li return Builder.CreateAdd(Ops[0], tmp);
6182*67e74705SXin Li }
6183*67e74705SXin Li // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6184*67e74705SXin Li // of an Align parameter here.
6185*67e74705SXin Li case NEON::BI__builtin_neon_vld1_x2_v:
6186*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_x2_v:
6187*67e74705SXin Li case NEON::BI__builtin_neon_vld1_x3_v:
6188*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_x3_v:
6189*67e74705SXin Li case NEON::BI__builtin_neon_vld1_x4_v:
6190*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_x4_v: {
6191*67e74705SXin Li llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6192*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6193*67e74705SXin Li llvm::Type *Tys[2] = { VTy, PTy };
6194*67e74705SXin Li unsigned Int;
6195*67e74705SXin Li switch (BuiltinID) {
6196*67e74705SXin Li case NEON::BI__builtin_neon_vld1_x2_v:
6197*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_x2_v:
6198*67e74705SXin Li Int = Intrinsic::aarch64_neon_ld1x2;
6199*67e74705SXin Li break;
6200*67e74705SXin Li case NEON::BI__builtin_neon_vld1_x3_v:
6201*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_x3_v:
6202*67e74705SXin Li Int = Intrinsic::aarch64_neon_ld1x3;
6203*67e74705SXin Li break;
6204*67e74705SXin Li case NEON::BI__builtin_neon_vld1_x4_v:
6205*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_x4_v:
6206*67e74705SXin Li Int = Intrinsic::aarch64_neon_ld1x4;
6207*67e74705SXin Li break;
6208*67e74705SXin Li }
6209*67e74705SXin Li Function *F = CGM.getIntrinsic(Int, Tys);
6210*67e74705SXin Li Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6211*67e74705SXin Li Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6212*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6213*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6214*67e74705SXin Li }
6215*67e74705SXin Li case NEON::BI__builtin_neon_vst1_x2_v:
6216*67e74705SXin Li case NEON::BI__builtin_neon_vst1q_x2_v:
6217*67e74705SXin Li case NEON::BI__builtin_neon_vst1_x3_v:
6218*67e74705SXin Li case NEON::BI__builtin_neon_vst1q_x3_v:
6219*67e74705SXin Li case NEON::BI__builtin_neon_vst1_x4_v:
6220*67e74705SXin Li case NEON::BI__builtin_neon_vst1q_x4_v: {
6221*67e74705SXin Li llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6222*67e74705SXin Li llvm::Type *Tys[2] = { VTy, PTy };
6223*67e74705SXin Li unsigned Int;
6224*67e74705SXin Li switch (BuiltinID) {
6225*67e74705SXin Li case NEON::BI__builtin_neon_vst1_x2_v:
6226*67e74705SXin Li case NEON::BI__builtin_neon_vst1q_x2_v:
6227*67e74705SXin Li Int = Intrinsic::aarch64_neon_st1x2;
6228*67e74705SXin Li break;
6229*67e74705SXin Li case NEON::BI__builtin_neon_vst1_x3_v:
6230*67e74705SXin Li case NEON::BI__builtin_neon_vst1q_x3_v:
6231*67e74705SXin Li Int = Intrinsic::aarch64_neon_st1x3;
6232*67e74705SXin Li break;
6233*67e74705SXin Li case NEON::BI__builtin_neon_vst1_x4_v:
6234*67e74705SXin Li case NEON::BI__builtin_neon_vst1q_x4_v:
6235*67e74705SXin Li Int = Intrinsic::aarch64_neon_st1x4;
6236*67e74705SXin Li break;
6237*67e74705SXin Li }
6238*67e74705SXin Li std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6239*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6240*67e74705SXin Li }
6241*67e74705SXin Li case NEON::BI__builtin_neon_vld1_v:
6242*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_v:
6243*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6244*67e74705SXin Li return Builder.CreateDefaultAlignedLoad(Ops[0]);
6245*67e74705SXin Li case NEON::BI__builtin_neon_vst1_v:
6246*67e74705SXin Li case NEON::BI__builtin_neon_vst1q_v:
6247*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6248*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6249*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6250*67e74705SXin Li case NEON::BI__builtin_neon_vld1_lane_v:
6251*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_lane_v:
6252*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6253*67e74705SXin Li Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6254*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6255*67e74705SXin Li Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
6256*67e74705SXin Li return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6257*67e74705SXin Li case NEON::BI__builtin_neon_vld1_dup_v:
6258*67e74705SXin Li case NEON::BI__builtin_neon_vld1q_dup_v: {
6259*67e74705SXin Li Value *V = UndefValue::get(Ty);
6260*67e74705SXin Li Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6261*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6262*67e74705SXin Li Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
6263*67e74705SXin Li llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6264*67e74705SXin Li Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6265*67e74705SXin Li return EmitNeonSplat(Ops[0], CI);
6266*67e74705SXin Li }
6267*67e74705SXin Li case NEON::BI__builtin_neon_vst1_lane_v:
6268*67e74705SXin Li case NEON::BI__builtin_neon_vst1q_lane_v:
6269*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6270*67e74705SXin Li Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6271*67e74705SXin Li Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6272*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1],
6273*67e74705SXin Li Builder.CreateBitCast(Ops[0], Ty));
6274*67e74705SXin Li case NEON::BI__builtin_neon_vld2_v:
6275*67e74705SXin Li case NEON::BI__builtin_neon_vld2q_v: {
6276*67e74705SXin Li llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6277*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6278*67e74705SXin Li llvm::Type *Tys[2] = { VTy, PTy };
6279*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6280*67e74705SXin Li Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6281*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0],
6282*67e74705SXin Li llvm::PointerType::getUnqual(Ops[1]->getType()));
6283*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6284*67e74705SXin Li }
6285*67e74705SXin Li case NEON::BI__builtin_neon_vld3_v:
6286*67e74705SXin Li case NEON::BI__builtin_neon_vld3q_v: {
6287*67e74705SXin Li llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6288*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6289*67e74705SXin Li llvm::Type *Tys[2] = { VTy, PTy };
6290*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6291*67e74705SXin Li Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6292*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0],
6293*67e74705SXin Li llvm::PointerType::getUnqual(Ops[1]->getType()));
6294*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6295*67e74705SXin Li }
6296*67e74705SXin Li case NEON::BI__builtin_neon_vld4_v:
6297*67e74705SXin Li case NEON::BI__builtin_neon_vld4q_v: {
6298*67e74705SXin Li llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6299*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6300*67e74705SXin Li llvm::Type *Tys[2] = { VTy, PTy };
6301*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6302*67e74705SXin Li Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6303*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0],
6304*67e74705SXin Li llvm::PointerType::getUnqual(Ops[1]->getType()));
6305*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6306*67e74705SXin Li }
6307*67e74705SXin Li case NEON::BI__builtin_neon_vld2_dup_v:
6308*67e74705SXin Li case NEON::BI__builtin_neon_vld2q_dup_v: {
6309*67e74705SXin Li llvm::Type *PTy =
6310*67e74705SXin Li llvm::PointerType::getUnqual(VTy->getElementType());
6311*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6312*67e74705SXin Li llvm::Type *Tys[2] = { VTy, PTy };
6313*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6314*67e74705SXin Li Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6315*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0],
6316*67e74705SXin Li llvm::PointerType::getUnqual(Ops[1]->getType()));
6317*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6318*67e74705SXin Li }
6319*67e74705SXin Li case NEON::BI__builtin_neon_vld3_dup_v:
6320*67e74705SXin Li case NEON::BI__builtin_neon_vld3q_dup_v: {
6321*67e74705SXin Li llvm::Type *PTy =
6322*67e74705SXin Li llvm::PointerType::getUnqual(VTy->getElementType());
6323*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6324*67e74705SXin Li llvm::Type *Tys[2] = { VTy, PTy };
6325*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6326*67e74705SXin Li Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6327*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0],
6328*67e74705SXin Li llvm::PointerType::getUnqual(Ops[1]->getType()));
6329*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6330*67e74705SXin Li }
6331*67e74705SXin Li case NEON::BI__builtin_neon_vld4_dup_v:
6332*67e74705SXin Li case NEON::BI__builtin_neon_vld4q_dup_v: {
6333*67e74705SXin Li llvm::Type *PTy =
6334*67e74705SXin Li llvm::PointerType::getUnqual(VTy->getElementType());
6335*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6336*67e74705SXin Li llvm::Type *Tys[2] = { VTy, PTy };
6337*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6338*67e74705SXin Li Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6339*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0],
6340*67e74705SXin Li llvm::PointerType::getUnqual(Ops[1]->getType()));
6341*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6342*67e74705SXin Li }
6343*67e74705SXin Li case NEON::BI__builtin_neon_vld2_lane_v:
6344*67e74705SXin Li case NEON::BI__builtin_neon_vld2q_lane_v: {
6345*67e74705SXin Li llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6346*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6347*67e74705SXin Li Ops.push_back(Ops[1]);
6348*67e74705SXin Li Ops.erase(Ops.begin()+1);
6349*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6350*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6351*67e74705SXin Li Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6352*67e74705SXin Li Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6353*67e74705SXin Li Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6354*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6355*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6356*67e74705SXin Li }
6357*67e74705SXin Li case NEON::BI__builtin_neon_vld3_lane_v:
6358*67e74705SXin Li case NEON::BI__builtin_neon_vld3q_lane_v: {
6359*67e74705SXin Li llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6360*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6361*67e74705SXin Li Ops.push_back(Ops[1]);
6362*67e74705SXin Li Ops.erase(Ops.begin()+1);
6363*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6364*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6365*67e74705SXin Li Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6366*67e74705SXin Li Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6367*67e74705SXin Li Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6368*67e74705SXin Li Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6369*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6370*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6371*67e74705SXin Li }
6372*67e74705SXin Li case NEON::BI__builtin_neon_vld4_lane_v:
6373*67e74705SXin Li case NEON::BI__builtin_neon_vld4q_lane_v: {
6374*67e74705SXin Li llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6375*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6376*67e74705SXin Li Ops.push_back(Ops[1]);
6377*67e74705SXin Li Ops.erase(Ops.begin()+1);
6378*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6379*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6380*67e74705SXin Li Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6381*67e74705SXin Li Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6382*67e74705SXin Li Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6383*67e74705SXin Li Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6384*67e74705SXin Li Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6385*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6386*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6387*67e74705SXin Li }
6388*67e74705SXin Li case NEON::BI__builtin_neon_vst2_v:
6389*67e74705SXin Li case NEON::BI__builtin_neon_vst2q_v: {
6390*67e74705SXin Li Ops.push_back(Ops[0]);
6391*67e74705SXin Li Ops.erase(Ops.begin());
6392*67e74705SXin Li llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6393*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6394*67e74705SXin Li Ops, "");
6395*67e74705SXin Li }
6396*67e74705SXin Li case NEON::BI__builtin_neon_vst2_lane_v:
6397*67e74705SXin Li case NEON::BI__builtin_neon_vst2q_lane_v: {
6398*67e74705SXin Li Ops.push_back(Ops[0]);
6399*67e74705SXin Li Ops.erase(Ops.begin());
6400*67e74705SXin Li Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6401*67e74705SXin Li llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6402*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6403*67e74705SXin Li Ops, "");
6404*67e74705SXin Li }
6405*67e74705SXin Li case NEON::BI__builtin_neon_vst3_v:
6406*67e74705SXin Li case NEON::BI__builtin_neon_vst3q_v: {
6407*67e74705SXin Li Ops.push_back(Ops[0]);
6408*67e74705SXin Li Ops.erase(Ops.begin());
6409*67e74705SXin Li llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6410*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6411*67e74705SXin Li Ops, "");
6412*67e74705SXin Li }
6413*67e74705SXin Li case NEON::BI__builtin_neon_vst3_lane_v:
6414*67e74705SXin Li case NEON::BI__builtin_neon_vst3q_lane_v: {
6415*67e74705SXin Li Ops.push_back(Ops[0]);
6416*67e74705SXin Li Ops.erase(Ops.begin());
6417*67e74705SXin Li Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6418*67e74705SXin Li llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6419*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6420*67e74705SXin Li Ops, "");
6421*67e74705SXin Li }
6422*67e74705SXin Li case NEON::BI__builtin_neon_vst4_v:
6423*67e74705SXin Li case NEON::BI__builtin_neon_vst4q_v: {
6424*67e74705SXin Li Ops.push_back(Ops[0]);
6425*67e74705SXin Li Ops.erase(Ops.begin());
6426*67e74705SXin Li llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6427*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6428*67e74705SXin Li Ops, "");
6429*67e74705SXin Li }
6430*67e74705SXin Li case NEON::BI__builtin_neon_vst4_lane_v:
6431*67e74705SXin Li case NEON::BI__builtin_neon_vst4q_lane_v: {
6432*67e74705SXin Li Ops.push_back(Ops[0]);
6433*67e74705SXin Li Ops.erase(Ops.begin());
6434*67e74705SXin Li Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6435*67e74705SXin Li llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6436*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6437*67e74705SXin Li Ops, "");
6438*67e74705SXin Li }
6439*67e74705SXin Li case NEON::BI__builtin_neon_vtrn_v:
6440*67e74705SXin Li case NEON::BI__builtin_neon_vtrnq_v: {
6441*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6442*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6443*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6444*67e74705SXin Li Value *SV = nullptr;
6445*67e74705SXin Li
6446*67e74705SXin Li for (unsigned vi = 0; vi != 2; ++vi) {
6447*67e74705SXin Li SmallVector<uint32_t, 16> Indices;
6448*67e74705SXin Li for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6449*67e74705SXin Li Indices.push_back(i+vi);
6450*67e74705SXin Li Indices.push_back(i+e+vi);
6451*67e74705SXin Li }
6452*67e74705SXin Li Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6453*67e74705SXin Li SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6454*67e74705SXin Li SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6455*67e74705SXin Li }
6456*67e74705SXin Li return SV;
6457*67e74705SXin Li }
6458*67e74705SXin Li case NEON::BI__builtin_neon_vuzp_v:
6459*67e74705SXin Li case NEON::BI__builtin_neon_vuzpq_v: {
6460*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6461*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6462*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6463*67e74705SXin Li Value *SV = nullptr;
6464*67e74705SXin Li
6465*67e74705SXin Li for (unsigned vi = 0; vi != 2; ++vi) {
6466*67e74705SXin Li SmallVector<uint32_t, 16> Indices;
6467*67e74705SXin Li for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6468*67e74705SXin Li Indices.push_back(2*i+vi);
6469*67e74705SXin Li
6470*67e74705SXin Li Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6471*67e74705SXin Li SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6472*67e74705SXin Li SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6473*67e74705SXin Li }
6474*67e74705SXin Li return SV;
6475*67e74705SXin Li }
6476*67e74705SXin Li case NEON::BI__builtin_neon_vzip_v:
6477*67e74705SXin Li case NEON::BI__builtin_neon_vzipq_v: {
6478*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6479*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6480*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6481*67e74705SXin Li Value *SV = nullptr;
6482*67e74705SXin Li
6483*67e74705SXin Li for (unsigned vi = 0; vi != 2; ++vi) {
6484*67e74705SXin Li SmallVector<uint32_t, 16> Indices;
6485*67e74705SXin Li for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6486*67e74705SXin Li Indices.push_back((i + vi*e) >> 1);
6487*67e74705SXin Li Indices.push_back(((i + vi*e) >> 1)+e);
6488*67e74705SXin Li }
6489*67e74705SXin Li Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6490*67e74705SXin Li SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6491*67e74705SXin Li SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6492*67e74705SXin Li }
6493*67e74705SXin Li return SV;
6494*67e74705SXin Li }
6495*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl1q_v: {
6496*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6497*67e74705SXin Li Ops, "vtbl1");
6498*67e74705SXin Li }
6499*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl2q_v: {
6500*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6501*67e74705SXin Li Ops, "vtbl2");
6502*67e74705SXin Li }
6503*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl3q_v: {
6504*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6505*67e74705SXin Li Ops, "vtbl3");
6506*67e74705SXin Li }
6507*67e74705SXin Li case NEON::BI__builtin_neon_vqtbl4q_v: {
6508*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6509*67e74705SXin Li Ops, "vtbl4");
6510*67e74705SXin Li }
6511*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx1q_v: {
6512*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6513*67e74705SXin Li Ops, "vtbx1");
6514*67e74705SXin Li }
6515*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx2q_v: {
6516*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6517*67e74705SXin Li Ops, "vtbx2");
6518*67e74705SXin Li }
6519*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx3q_v: {
6520*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6521*67e74705SXin Li Ops, "vtbx3");
6522*67e74705SXin Li }
6523*67e74705SXin Li case NEON::BI__builtin_neon_vqtbx4q_v: {
6524*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6525*67e74705SXin Li Ops, "vtbx4");
6526*67e74705SXin Li }
6527*67e74705SXin Li case NEON::BI__builtin_neon_vsqadd_v:
6528*67e74705SXin Li case NEON::BI__builtin_neon_vsqaddq_v: {
6529*67e74705SXin Li Int = Intrinsic::aarch64_neon_usqadd;
6530*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6531*67e74705SXin Li }
6532*67e74705SXin Li case NEON::BI__builtin_neon_vuqadd_v:
6533*67e74705SXin Li case NEON::BI__builtin_neon_vuqaddq_v: {
6534*67e74705SXin Li Int = Intrinsic::aarch64_neon_suqadd;
6535*67e74705SXin Li return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6536*67e74705SXin Li }
6537*67e74705SXin Li }
6538*67e74705SXin Li }
6539*67e74705SXin Li
6540*67e74705SXin Li llvm::Value *CodeGenFunction::
BuildVector(ArrayRef<llvm::Value * > Ops)6541*67e74705SXin Li BuildVector(ArrayRef<llvm::Value*> Ops) {
6542*67e74705SXin Li assert((Ops.size() & (Ops.size() - 1)) == 0 &&
6543*67e74705SXin Li "Not a power-of-two sized vector!");
6544*67e74705SXin Li bool AllConstants = true;
6545*67e74705SXin Li for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
6546*67e74705SXin Li AllConstants &= isa<Constant>(Ops[i]);
6547*67e74705SXin Li
6548*67e74705SXin Li // If this is a constant vector, create a ConstantVector.
6549*67e74705SXin Li if (AllConstants) {
6550*67e74705SXin Li SmallVector<llvm::Constant*, 16> CstOps;
6551*67e74705SXin Li for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6552*67e74705SXin Li CstOps.push_back(cast<Constant>(Ops[i]));
6553*67e74705SXin Li return llvm::ConstantVector::get(CstOps);
6554*67e74705SXin Li }
6555*67e74705SXin Li
6556*67e74705SXin Li // Otherwise, insertelement the values to build the vector.
6557*67e74705SXin Li Value *Result =
6558*67e74705SXin Li llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
6559*67e74705SXin Li
6560*67e74705SXin Li for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6561*67e74705SXin Li Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
6562*67e74705SXin Li
6563*67e74705SXin Li return Result;
6564*67e74705SXin Li }
6565*67e74705SXin Li
6566*67e74705SXin Li // Convert the mask from an integer type to a vector of i1.
getMaskVecValue(CodeGenFunction & CGF,Value * Mask,unsigned NumElts)6567*67e74705SXin Li static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
6568*67e74705SXin Li unsigned NumElts) {
6569*67e74705SXin Li
6570*67e74705SXin Li llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
6571*67e74705SXin Li cast<IntegerType>(Mask->getType())->getBitWidth());
6572*67e74705SXin Li Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
6573*67e74705SXin Li
6574*67e74705SXin Li // If we have less than 8 elements, then the starting mask was an i8 and
6575*67e74705SXin Li // we need to extract down to the right number of elements.
6576*67e74705SXin Li if (NumElts < 8) {
6577*67e74705SXin Li uint32_t Indices[4];
6578*67e74705SXin Li for (unsigned i = 0; i != NumElts; ++i)
6579*67e74705SXin Li Indices[i] = i;
6580*67e74705SXin Li MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
6581*67e74705SXin Li makeArrayRef(Indices, NumElts),
6582*67e74705SXin Li "extract");
6583*67e74705SXin Li }
6584*67e74705SXin Li return MaskVec;
6585*67e74705SXin Li }
6586*67e74705SXin Li
EmitX86MaskedStore(CodeGenFunction & CGF,SmallVectorImpl<Value * > & Ops,unsigned Align)6587*67e74705SXin Li static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
6588*67e74705SXin Li SmallVectorImpl<Value *> &Ops,
6589*67e74705SXin Li unsigned Align) {
6590*67e74705SXin Li // Cast the pointer to right type.
6591*67e74705SXin Li Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6592*67e74705SXin Li llvm::PointerType::getUnqual(Ops[1]->getType()));
6593*67e74705SXin Li
6594*67e74705SXin Li // If the mask is all ones just emit a regular store.
6595*67e74705SXin Li if (const auto *C = dyn_cast<Constant>(Ops[2]))
6596*67e74705SXin Li if (C->isAllOnesValue())
6597*67e74705SXin Li return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
6598*67e74705SXin Li
6599*67e74705SXin Li Value *MaskVec = getMaskVecValue(CGF, Ops[2],
6600*67e74705SXin Li Ops[1]->getType()->getVectorNumElements());
6601*67e74705SXin Li
6602*67e74705SXin Li return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
6603*67e74705SXin Li }
6604*67e74705SXin Li
EmitX86MaskedLoad(CodeGenFunction & CGF,SmallVectorImpl<Value * > & Ops,unsigned Align)6605*67e74705SXin Li static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
6606*67e74705SXin Li SmallVectorImpl<Value *> &Ops, unsigned Align) {
6607*67e74705SXin Li // Cast the pointer to right type.
6608*67e74705SXin Li Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6609*67e74705SXin Li llvm::PointerType::getUnqual(Ops[1]->getType()));
6610*67e74705SXin Li
6611*67e74705SXin Li // If the mask is all ones just emit a regular store.
6612*67e74705SXin Li if (const auto *C = dyn_cast<Constant>(Ops[2]))
6613*67e74705SXin Li if (C->isAllOnesValue())
6614*67e74705SXin Li return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
6615*67e74705SXin Li
6616*67e74705SXin Li Value *MaskVec = getMaskVecValue(CGF, Ops[2],
6617*67e74705SXin Li Ops[1]->getType()->getVectorNumElements());
6618*67e74705SXin Li
6619*67e74705SXin Li return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
6620*67e74705SXin Li }
6621*67e74705SXin Li
EmitX86Select(CodeGenFunction & CGF,Value * Mask,Value * Op0,Value * Op1)6622*67e74705SXin Li static Value *EmitX86Select(CodeGenFunction &CGF,
6623*67e74705SXin Li Value *Mask, Value *Op0, Value *Op1) {
6624*67e74705SXin Li
6625*67e74705SXin Li // If the mask is all ones just return first argument.
6626*67e74705SXin Li if (const auto *C = dyn_cast<Constant>(Mask))
6627*67e74705SXin Li if (C->isAllOnesValue())
6628*67e74705SXin Li return Op0;
6629*67e74705SXin Li
6630*67e74705SXin Li Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
6631*67e74705SXin Li
6632*67e74705SXin Li return CGF.Builder.CreateSelect(Mask, Op0, Op1);
6633*67e74705SXin Li }
6634*67e74705SXin Li
EmitX86MaskedCompare(CodeGenFunction & CGF,unsigned CC,bool Signed,SmallVectorImpl<Value * > & Ops)6635*67e74705SXin Li static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
6636*67e74705SXin Li bool Signed, SmallVectorImpl<Value *> &Ops) {
6637*67e74705SXin Li unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
6638*67e74705SXin Li Value *Cmp;
6639*67e74705SXin Li
6640*67e74705SXin Li if (CC == 3) {
6641*67e74705SXin Li Cmp = Constant::getNullValue(
6642*67e74705SXin Li llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
6643*67e74705SXin Li } else if (CC == 7) {
6644*67e74705SXin Li Cmp = Constant::getAllOnesValue(
6645*67e74705SXin Li llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
6646*67e74705SXin Li } else {
6647*67e74705SXin Li ICmpInst::Predicate Pred;
6648*67e74705SXin Li switch (CC) {
6649*67e74705SXin Li default: llvm_unreachable("Unknown condition code");
6650*67e74705SXin Li case 0: Pred = ICmpInst::ICMP_EQ; break;
6651*67e74705SXin Li case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
6652*67e74705SXin Li case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
6653*67e74705SXin Li case 4: Pred = ICmpInst::ICMP_NE; break;
6654*67e74705SXin Li case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
6655*67e74705SXin Li case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
6656*67e74705SXin Li }
6657*67e74705SXin Li Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
6658*67e74705SXin Li }
6659*67e74705SXin Li
6660*67e74705SXin Li const auto *C = dyn_cast<Constant>(Ops.back());
6661*67e74705SXin Li if (!C || !C->isAllOnesValue())
6662*67e74705SXin Li Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
6663*67e74705SXin Li
6664*67e74705SXin Li if (NumElts < 8) {
6665*67e74705SXin Li uint32_t Indices[8];
6666*67e74705SXin Li for (unsigned i = 0; i != NumElts; ++i)
6667*67e74705SXin Li Indices[i] = i;
6668*67e74705SXin Li for (unsigned i = NumElts; i != 8; ++i)
6669*67e74705SXin Li Indices[i] = i % NumElts + NumElts;
6670*67e74705SXin Li Cmp = CGF.Builder.CreateShuffleVector(
6671*67e74705SXin Li Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
6672*67e74705SXin Li }
6673*67e74705SXin Li return CGF.Builder.CreateBitCast(Cmp,
6674*67e74705SXin Li IntegerType::get(CGF.getLLVMContext(),
6675*67e74705SXin Li std::max(NumElts, 8U)));
6676*67e74705SXin Li }
6677*67e74705SXin Li
EmitX86BuiltinExpr(unsigned BuiltinID,const CallExpr * E)6678*67e74705SXin Li Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
6679*67e74705SXin Li const CallExpr *E) {
6680*67e74705SXin Li if (BuiltinID == X86::BI__builtin_ms_va_start ||
6681*67e74705SXin Li BuiltinID == X86::BI__builtin_ms_va_end)
6682*67e74705SXin Li return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
6683*67e74705SXin Li BuiltinID == X86::BI__builtin_ms_va_start);
6684*67e74705SXin Li if (BuiltinID == X86::BI__builtin_ms_va_copy) {
6685*67e74705SXin Li // Lower this manually. We can't reliably determine whether or not any
6686*67e74705SXin Li // given va_copy() is for a Win64 va_list from the calling convention
6687*67e74705SXin Li // alone, because it's legal to do this from a System V ABI function.
6688*67e74705SXin Li // With opaque pointer types, we won't have enough information in LLVM
6689*67e74705SXin Li // IR to determine this from the argument types, either. Best to do it
6690*67e74705SXin Li // now, while we have enough information.
6691*67e74705SXin Li Address DestAddr = EmitMSVAListRef(E->getArg(0));
6692*67e74705SXin Li Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6693*67e74705SXin Li
6694*67e74705SXin Li llvm::Type *BPP = Int8PtrPtrTy;
6695*67e74705SXin Li
6696*67e74705SXin Li DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
6697*67e74705SXin Li DestAddr.getAlignment());
6698*67e74705SXin Li SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
6699*67e74705SXin Li SrcAddr.getAlignment());
6700*67e74705SXin Li
6701*67e74705SXin Li Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6702*67e74705SXin Li return Builder.CreateStore(ArgPtr, DestAddr);
6703*67e74705SXin Li }
6704*67e74705SXin Li
6705*67e74705SXin Li SmallVector<Value*, 4> Ops;
6706*67e74705SXin Li
6707*67e74705SXin Li // Find out if any arguments are required to be integer constant expressions.
6708*67e74705SXin Li unsigned ICEArguments = 0;
6709*67e74705SXin Li ASTContext::GetBuiltinTypeError Error;
6710*67e74705SXin Li getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6711*67e74705SXin Li assert(Error == ASTContext::GE_None && "Should not codegen an error");
6712*67e74705SXin Li
6713*67e74705SXin Li for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
6714*67e74705SXin Li // If this is a normal argument, just emit it as a scalar.
6715*67e74705SXin Li if ((ICEArguments & (1 << i)) == 0) {
6716*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(i)));
6717*67e74705SXin Li continue;
6718*67e74705SXin Li }
6719*67e74705SXin Li
6720*67e74705SXin Li // If this is required to be a constant, constant fold it so that we know
6721*67e74705SXin Li // that the generated intrinsic gets a ConstantInt.
6722*67e74705SXin Li llvm::APSInt Result;
6723*67e74705SXin Li bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6724*67e74705SXin Li assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
6725*67e74705SXin Li Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6726*67e74705SXin Li }
6727*67e74705SXin Li
6728*67e74705SXin Li // These exist so that the builtin that takes an immediate can be bounds
6729*67e74705SXin Li // checked by clang to avoid passing bad immediates to the backend. Since
6730*67e74705SXin Li // AVX has a larger immediate than SSE we would need separate builtins to
6731*67e74705SXin Li // do the different bounds checking. Rather than create a clang specific
6732*67e74705SXin Li // SSE only builtin, this implements eight separate builtins to match gcc
6733*67e74705SXin Li // implementation.
6734*67e74705SXin Li auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
6735*67e74705SXin Li Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
6736*67e74705SXin Li llvm::Function *F = CGM.getIntrinsic(ID);
6737*67e74705SXin Li return Builder.CreateCall(F, Ops);
6738*67e74705SXin Li };
6739*67e74705SXin Li
6740*67e74705SXin Li // For the vector forms of FP comparisons, translate the builtins directly to
6741*67e74705SXin Li // IR.
6742*67e74705SXin Li // TODO: The builtins could be removed if the SSE header files used vector
6743*67e74705SXin Li // extension comparisons directly (vector ordered/unordered may need
6744*67e74705SXin Li // additional support via __builtin_isnan()).
6745*67e74705SXin Li auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
6746*67e74705SXin Li Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
6747*67e74705SXin Li llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
6748*67e74705SXin Li llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
6749*67e74705SXin Li Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
6750*67e74705SXin Li return Builder.CreateBitCast(Sext, FPVecTy);
6751*67e74705SXin Li };
6752*67e74705SXin Li
6753*67e74705SXin Li switch (BuiltinID) {
6754*67e74705SXin Li default: return nullptr;
6755*67e74705SXin Li case X86::BI__builtin_cpu_supports: {
6756*67e74705SXin Li const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
6757*67e74705SXin Li StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
6758*67e74705SXin Li
6759*67e74705SXin Li // TODO: When/if this becomes more than x86 specific then use a TargetInfo
6760*67e74705SXin Li // based mapping.
6761*67e74705SXin Li // Processor features and mapping to processor feature value.
6762*67e74705SXin Li enum X86Features {
6763*67e74705SXin Li CMOV = 0,
6764*67e74705SXin Li MMX,
6765*67e74705SXin Li POPCNT,
6766*67e74705SXin Li SSE,
6767*67e74705SXin Li SSE2,
6768*67e74705SXin Li SSE3,
6769*67e74705SXin Li SSSE3,
6770*67e74705SXin Li SSE4_1,
6771*67e74705SXin Li SSE4_2,
6772*67e74705SXin Li AVX,
6773*67e74705SXin Li AVX2,
6774*67e74705SXin Li SSE4_A,
6775*67e74705SXin Li FMA4,
6776*67e74705SXin Li XOP,
6777*67e74705SXin Li FMA,
6778*67e74705SXin Li AVX512F,
6779*67e74705SXin Li BMI,
6780*67e74705SXin Li BMI2,
6781*67e74705SXin Li AES,
6782*67e74705SXin Li PCLMUL,
6783*67e74705SXin Li AVX512VL,
6784*67e74705SXin Li AVX512BW,
6785*67e74705SXin Li AVX512DQ,
6786*67e74705SXin Li AVX512CD,
6787*67e74705SXin Li AVX512ER,
6788*67e74705SXin Li AVX512PF,
6789*67e74705SXin Li AVX512VBMI,
6790*67e74705SXin Li AVX512IFMA,
6791*67e74705SXin Li MAX
6792*67e74705SXin Li };
6793*67e74705SXin Li
6794*67e74705SXin Li X86Features Feature = StringSwitch<X86Features>(FeatureStr)
6795*67e74705SXin Li .Case("cmov", X86Features::CMOV)
6796*67e74705SXin Li .Case("mmx", X86Features::MMX)
6797*67e74705SXin Li .Case("popcnt", X86Features::POPCNT)
6798*67e74705SXin Li .Case("sse", X86Features::SSE)
6799*67e74705SXin Li .Case("sse2", X86Features::SSE2)
6800*67e74705SXin Li .Case("sse3", X86Features::SSE3)
6801*67e74705SXin Li .Case("ssse3", X86Features::SSSE3)
6802*67e74705SXin Li .Case("sse4.1", X86Features::SSE4_1)
6803*67e74705SXin Li .Case("sse4.2", X86Features::SSE4_2)
6804*67e74705SXin Li .Case("avx", X86Features::AVX)
6805*67e74705SXin Li .Case("avx2", X86Features::AVX2)
6806*67e74705SXin Li .Case("sse4a", X86Features::SSE4_A)
6807*67e74705SXin Li .Case("fma4", X86Features::FMA4)
6808*67e74705SXin Li .Case("xop", X86Features::XOP)
6809*67e74705SXin Li .Case("fma", X86Features::FMA)
6810*67e74705SXin Li .Case("avx512f", X86Features::AVX512F)
6811*67e74705SXin Li .Case("bmi", X86Features::BMI)
6812*67e74705SXin Li .Case("bmi2", X86Features::BMI2)
6813*67e74705SXin Li .Case("aes", X86Features::AES)
6814*67e74705SXin Li .Case("pclmul", X86Features::PCLMUL)
6815*67e74705SXin Li .Case("avx512vl", X86Features::AVX512VL)
6816*67e74705SXin Li .Case("avx512bw", X86Features::AVX512BW)
6817*67e74705SXin Li .Case("avx512dq", X86Features::AVX512DQ)
6818*67e74705SXin Li .Case("avx512cd", X86Features::AVX512CD)
6819*67e74705SXin Li .Case("avx512er", X86Features::AVX512ER)
6820*67e74705SXin Li .Case("avx512pf", X86Features::AVX512PF)
6821*67e74705SXin Li .Case("avx512vbmi", X86Features::AVX512VBMI)
6822*67e74705SXin Li .Case("avx512ifma", X86Features::AVX512IFMA)
6823*67e74705SXin Li .Default(X86Features::MAX);
6824*67e74705SXin Li assert(Feature != X86Features::MAX && "Invalid feature!");
6825*67e74705SXin Li
6826*67e74705SXin Li // Matching the struct layout from the compiler-rt/libgcc structure that is
6827*67e74705SXin Li // filled in:
6828*67e74705SXin Li // unsigned int __cpu_vendor;
6829*67e74705SXin Li // unsigned int __cpu_type;
6830*67e74705SXin Li // unsigned int __cpu_subtype;
6831*67e74705SXin Li // unsigned int __cpu_features[1];
6832*67e74705SXin Li llvm::Type *STy = llvm::StructType::get(
6833*67e74705SXin Li Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
6834*67e74705SXin Li
6835*67e74705SXin Li // Grab the global __cpu_model.
6836*67e74705SXin Li llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
6837*67e74705SXin Li
6838*67e74705SXin Li // Grab the first (0th) element from the field __cpu_features off of the
6839*67e74705SXin Li // global in the struct STy.
6840*67e74705SXin Li Value *Idxs[] = {
6841*67e74705SXin Li ConstantInt::get(Int32Ty, 0),
6842*67e74705SXin Li ConstantInt::get(Int32Ty, 3),
6843*67e74705SXin Li ConstantInt::get(Int32Ty, 0)
6844*67e74705SXin Li };
6845*67e74705SXin Li Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
6846*67e74705SXin Li Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
6847*67e74705SXin Li CharUnits::fromQuantity(4));
6848*67e74705SXin Li
6849*67e74705SXin Li // Check the value of the bit corresponding to the feature requested.
6850*67e74705SXin Li Value *Bitset = Builder.CreateAnd(
6851*67e74705SXin Li Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
6852*67e74705SXin Li return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
6853*67e74705SXin Li }
6854*67e74705SXin Li case X86::BI_mm_prefetch: {
6855*67e74705SXin Li Value *Address = Ops[0];
6856*67e74705SXin Li Value *RW = ConstantInt::get(Int32Ty, 0);
6857*67e74705SXin Li Value *Locality = Ops[1];
6858*67e74705SXin Li Value *Data = ConstantInt::get(Int32Ty, 1);
6859*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
6860*67e74705SXin Li return Builder.CreateCall(F, {Address, RW, Locality, Data});
6861*67e74705SXin Li }
6862*67e74705SXin Li case X86::BI__builtin_ia32_undef128:
6863*67e74705SXin Li case X86::BI__builtin_ia32_undef256:
6864*67e74705SXin Li case X86::BI__builtin_ia32_undef512:
6865*67e74705SXin Li return UndefValue::get(ConvertType(E->getType()));
6866*67e74705SXin Li case X86::BI__builtin_ia32_vec_init_v8qi:
6867*67e74705SXin Li case X86::BI__builtin_ia32_vec_init_v4hi:
6868*67e74705SXin Li case X86::BI__builtin_ia32_vec_init_v2si:
6869*67e74705SXin Li return Builder.CreateBitCast(BuildVector(Ops),
6870*67e74705SXin Li llvm::Type::getX86_MMXTy(getLLVMContext()));
6871*67e74705SXin Li case X86::BI__builtin_ia32_vec_ext_v2si:
6872*67e74705SXin Li return Builder.CreateExtractElement(Ops[0],
6873*67e74705SXin Li llvm::ConstantInt::get(Ops[1]->getType(), 0));
6874*67e74705SXin Li case X86::BI__builtin_ia32_ldmxcsr: {
6875*67e74705SXin Li Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6876*67e74705SXin Li Builder.CreateStore(Ops[0], Tmp);
6877*67e74705SXin Li return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
6878*67e74705SXin Li Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6879*67e74705SXin Li }
6880*67e74705SXin Li case X86::BI__builtin_ia32_stmxcsr: {
6881*67e74705SXin Li Address Tmp = CreateMemTemp(E->getType());
6882*67e74705SXin Li Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
6883*67e74705SXin Li Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6884*67e74705SXin Li return Builder.CreateLoad(Tmp, "stmxcsr");
6885*67e74705SXin Li }
6886*67e74705SXin Li case X86::BI__builtin_ia32_xsave:
6887*67e74705SXin Li case X86::BI__builtin_ia32_xsave64:
6888*67e74705SXin Li case X86::BI__builtin_ia32_xrstor:
6889*67e74705SXin Li case X86::BI__builtin_ia32_xrstor64:
6890*67e74705SXin Li case X86::BI__builtin_ia32_xsaveopt:
6891*67e74705SXin Li case X86::BI__builtin_ia32_xsaveopt64:
6892*67e74705SXin Li case X86::BI__builtin_ia32_xrstors:
6893*67e74705SXin Li case X86::BI__builtin_ia32_xrstors64:
6894*67e74705SXin Li case X86::BI__builtin_ia32_xsavec:
6895*67e74705SXin Li case X86::BI__builtin_ia32_xsavec64:
6896*67e74705SXin Li case X86::BI__builtin_ia32_xsaves:
6897*67e74705SXin Li case X86::BI__builtin_ia32_xsaves64: {
6898*67e74705SXin Li Intrinsic::ID ID;
6899*67e74705SXin Li #define INTRINSIC_X86_XSAVE_ID(NAME) \
6900*67e74705SXin Li case X86::BI__builtin_ia32_##NAME: \
6901*67e74705SXin Li ID = Intrinsic::x86_##NAME; \
6902*67e74705SXin Li break
6903*67e74705SXin Li switch (BuiltinID) {
6904*67e74705SXin Li default: llvm_unreachable("Unsupported intrinsic!");
6905*67e74705SXin Li INTRINSIC_X86_XSAVE_ID(xsave);
6906*67e74705SXin Li INTRINSIC_X86_XSAVE_ID(xsave64);
6907*67e74705SXin Li INTRINSIC_X86_XSAVE_ID(xrstor);
6908*67e74705SXin Li INTRINSIC_X86_XSAVE_ID(xrstor64);
6909*67e74705SXin Li INTRINSIC_X86_XSAVE_ID(xsaveopt);
6910*67e74705SXin Li INTRINSIC_X86_XSAVE_ID(xsaveopt64);
6911*67e74705SXin Li INTRINSIC_X86_XSAVE_ID(xrstors);
6912*67e74705SXin Li INTRINSIC_X86_XSAVE_ID(xrstors64);
6913*67e74705SXin Li INTRINSIC_X86_XSAVE_ID(xsavec);
6914*67e74705SXin Li INTRINSIC_X86_XSAVE_ID(xsavec64);
6915*67e74705SXin Li INTRINSIC_X86_XSAVE_ID(xsaves);
6916*67e74705SXin Li INTRINSIC_X86_XSAVE_ID(xsaves64);
6917*67e74705SXin Li }
6918*67e74705SXin Li #undef INTRINSIC_X86_XSAVE_ID
6919*67e74705SXin Li Value *Mhi = Builder.CreateTrunc(
6920*67e74705SXin Li Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
6921*67e74705SXin Li Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
6922*67e74705SXin Li Ops[1] = Mhi;
6923*67e74705SXin Li Ops.push_back(Mlo);
6924*67e74705SXin Li return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
6925*67e74705SXin Li }
6926*67e74705SXin Li case X86::BI__builtin_ia32_storedqudi128_mask:
6927*67e74705SXin Li case X86::BI__builtin_ia32_storedqusi128_mask:
6928*67e74705SXin Li case X86::BI__builtin_ia32_storedquhi128_mask:
6929*67e74705SXin Li case X86::BI__builtin_ia32_storedquqi128_mask:
6930*67e74705SXin Li case X86::BI__builtin_ia32_storeupd128_mask:
6931*67e74705SXin Li case X86::BI__builtin_ia32_storeups128_mask:
6932*67e74705SXin Li case X86::BI__builtin_ia32_storedqudi256_mask:
6933*67e74705SXin Li case X86::BI__builtin_ia32_storedqusi256_mask:
6934*67e74705SXin Li case X86::BI__builtin_ia32_storedquhi256_mask:
6935*67e74705SXin Li case X86::BI__builtin_ia32_storedquqi256_mask:
6936*67e74705SXin Li case X86::BI__builtin_ia32_storeupd256_mask:
6937*67e74705SXin Li case X86::BI__builtin_ia32_storeups256_mask:
6938*67e74705SXin Li case X86::BI__builtin_ia32_storedqudi512_mask:
6939*67e74705SXin Li case X86::BI__builtin_ia32_storedqusi512_mask:
6940*67e74705SXin Li case X86::BI__builtin_ia32_storedquhi512_mask:
6941*67e74705SXin Li case X86::BI__builtin_ia32_storedquqi512_mask:
6942*67e74705SXin Li case X86::BI__builtin_ia32_storeupd512_mask:
6943*67e74705SXin Li case X86::BI__builtin_ia32_storeups512_mask:
6944*67e74705SXin Li return EmitX86MaskedStore(*this, Ops, 1);
6945*67e74705SXin Li
6946*67e74705SXin Li case X86::BI__builtin_ia32_movdqa32store128_mask:
6947*67e74705SXin Li case X86::BI__builtin_ia32_movdqa64store128_mask:
6948*67e74705SXin Li case X86::BI__builtin_ia32_storeaps128_mask:
6949*67e74705SXin Li case X86::BI__builtin_ia32_storeapd128_mask:
6950*67e74705SXin Li case X86::BI__builtin_ia32_movdqa32store256_mask:
6951*67e74705SXin Li case X86::BI__builtin_ia32_movdqa64store256_mask:
6952*67e74705SXin Li case X86::BI__builtin_ia32_storeaps256_mask:
6953*67e74705SXin Li case X86::BI__builtin_ia32_storeapd256_mask:
6954*67e74705SXin Li case X86::BI__builtin_ia32_movdqa32store512_mask:
6955*67e74705SXin Li case X86::BI__builtin_ia32_movdqa64store512_mask:
6956*67e74705SXin Li case X86::BI__builtin_ia32_storeaps512_mask:
6957*67e74705SXin Li case X86::BI__builtin_ia32_storeapd512_mask: {
6958*67e74705SXin Li unsigned Align =
6959*67e74705SXin Li getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
6960*67e74705SXin Li return EmitX86MaskedStore(*this, Ops, Align);
6961*67e74705SXin Li }
6962*67e74705SXin Li case X86::BI__builtin_ia32_loadups128_mask:
6963*67e74705SXin Li case X86::BI__builtin_ia32_loadups256_mask:
6964*67e74705SXin Li case X86::BI__builtin_ia32_loadups512_mask:
6965*67e74705SXin Li case X86::BI__builtin_ia32_loadupd128_mask:
6966*67e74705SXin Li case X86::BI__builtin_ia32_loadupd256_mask:
6967*67e74705SXin Li case X86::BI__builtin_ia32_loadupd512_mask:
6968*67e74705SXin Li case X86::BI__builtin_ia32_loaddquqi128_mask:
6969*67e74705SXin Li case X86::BI__builtin_ia32_loaddquqi256_mask:
6970*67e74705SXin Li case X86::BI__builtin_ia32_loaddquqi512_mask:
6971*67e74705SXin Li case X86::BI__builtin_ia32_loaddquhi128_mask:
6972*67e74705SXin Li case X86::BI__builtin_ia32_loaddquhi256_mask:
6973*67e74705SXin Li case X86::BI__builtin_ia32_loaddquhi512_mask:
6974*67e74705SXin Li case X86::BI__builtin_ia32_loaddqusi128_mask:
6975*67e74705SXin Li case X86::BI__builtin_ia32_loaddqusi256_mask:
6976*67e74705SXin Li case X86::BI__builtin_ia32_loaddqusi512_mask:
6977*67e74705SXin Li case X86::BI__builtin_ia32_loaddqudi128_mask:
6978*67e74705SXin Li case X86::BI__builtin_ia32_loaddqudi256_mask:
6979*67e74705SXin Li case X86::BI__builtin_ia32_loaddqudi512_mask:
6980*67e74705SXin Li return EmitX86MaskedLoad(*this, Ops, 1);
6981*67e74705SXin Li
6982*67e74705SXin Li case X86::BI__builtin_ia32_loadaps128_mask:
6983*67e74705SXin Li case X86::BI__builtin_ia32_loadaps256_mask:
6984*67e74705SXin Li case X86::BI__builtin_ia32_loadaps512_mask:
6985*67e74705SXin Li case X86::BI__builtin_ia32_loadapd128_mask:
6986*67e74705SXin Li case X86::BI__builtin_ia32_loadapd256_mask:
6987*67e74705SXin Li case X86::BI__builtin_ia32_loadapd512_mask:
6988*67e74705SXin Li case X86::BI__builtin_ia32_movdqa32load128_mask:
6989*67e74705SXin Li case X86::BI__builtin_ia32_movdqa32load256_mask:
6990*67e74705SXin Li case X86::BI__builtin_ia32_movdqa32load512_mask:
6991*67e74705SXin Li case X86::BI__builtin_ia32_movdqa64load128_mask:
6992*67e74705SXin Li case X86::BI__builtin_ia32_movdqa64load256_mask:
6993*67e74705SXin Li case X86::BI__builtin_ia32_movdqa64load512_mask: {
6994*67e74705SXin Li unsigned Align =
6995*67e74705SXin Li getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
6996*67e74705SXin Li return EmitX86MaskedLoad(*this, Ops, Align);
6997*67e74705SXin Li }
6998*67e74705SXin Li case X86::BI__builtin_ia32_storehps:
6999*67e74705SXin Li case X86::BI__builtin_ia32_storelps: {
7000*67e74705SXin Li llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7001*67e74705SXin Li llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7002*67e74705SXin Li
7003*67e74705SXin Li // cast val v2i64
7004*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7005*67e74705SXin Li
7006*67e74705SXin Li // extract (0, 1)
7007*67e74705SXin Li unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7008*67e74705SXin Li llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7009*67e74705SXin Li Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7010*67e74705SXin Li
7011*67e74705SXin Li // cast pointer to i64 & store
7012*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7013*67e74705SXin Li return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7014*67e74705SXin Li }
7015*67e74705SXin Li case X86::BI__builtin_ia32_palignr128:
7016*67e74705SXin Li case X86::BI__builtin_ia32_palignr256:
7017*67e74705SXin Li case X86::BI__builtin_ia32_palignr128_mask:
7018*67e74705SXin Li case X86::BI__builtin_ia32_palignr256_mask:
7019*67e74705SXin Li case X86::BI__builtin_ia32_palignr512_mask: {
7020*67e74705SXin Li unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7021*67e74705SXin Li
7022*67e74705SXin Li unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7023*67e74705SXin Li assert(NumElts % 16 == 0);
7024*67e74705SXin Li
7025*67e74705SXin Li // If palignr is shifting the pair of vectors more than the size of two
7026*67e74705SXin Li // lanes, emit zero.
7027*67e74705SXin Li if (ShiftVal >= 32)
7028*67e74705SXin Li return llvm::Constant::getNullValue(ConvertType(E->getType()));
7029*67e74705SXin Li
7030*67e74705SXin Li // If palignr is shifting the pair of input vectors more than one lane,
7031*67e74705SXin Li // but less than two lanes, convert to shifting in zeroes.
7032*67e74705SXin Li if (ShiftVal > 16) {
7033*67e74705SXin Li ShiftVal -= 16;
7034*67e74705SXin Li Ops[1] = Ops[0];
7035*67e74705SXin Li Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7036*67e74705SXin Li }
7037*67e74705SXin Li
7038*67e74705SXin Li uint32_t Indices[64];
7039*67e74705SXin Li // 256-bit palignr operates on 128-bit lanes so we need to handle that
7040*67e74705SXin Li for (unsigned l = 0; l != NumElts; l += 16) {
7041*67e74705SXin Li for (unsigned i = 0; i != 16; ++i) {
7042*67e74705SXin Li unsigned Idx = ShiftVal + i;
7043*67e74705SXin Li if (Idx >= 16)
7044*67e74705SXin Li Idx += NumElts - 16; // End of lane, switch operand.
7045*67e74705SXin Li Indices[l + i] = Idx + l;
7046*67e74705SXin Li }
7047*67e74705SXin Li }
7048*67e74705SXin Li
7049*67e74705SXin Li Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7050*67e74705SXin Li makeArrayRef(Indices, NumElts),
7051*67e74705SXin Li "palignr");
7052*67e74705SXin Li
7053*67e74705SXin Li // If this isn't a masked builtin, just return the align operation.
7054*67e74705SXin Li if (Ops.size() == 3)
7055*67e74705SXin Li return Align;
7056*67e74705SXin Li
7057*67e74705SXin Li return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7058*67e74705SXin Li }
7059*67e74705SXin Li
7060*67e74705SXin Li case X86::BI__builtin_ia32_movnti:
7061*67e74705SXin Li case X86::BI__builtin_ia32_movnti64: {
7062*67e74705SXin Li llvm::MDNode *Node = llvm::MDNode::get(
7063*67e74705SXin Li getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7064*67e74705SXin Li
7065*67e74705SXin Li // Convert the type of the pointer to a pointer to the stored type.
7066*67e74705SXin Li Value *BC = Builder.CreateBitCast(Ops[0],
7067*67e74705SXin Li llvm::PointerType::getUnqual(Ops[1]->getType()),
7068*67e74705SXin Li "cast");
7069*67e74705SXin Li StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
7070*67e74705SXin Li SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7071*67e74705SXin Li
7072*67e74705SXin Li // No alignment for scalar intrinsic store.
7073*67e74705SXin Li SI->setAlignment(1);
7074*67e74705SXin Li return SI;
7075*67e74705SXin Li }
7076*67e74705SXin Li case X86::BI__builtin_ia32_movntsd:
7077*67e74705SXin Li case X86::BI__builtin_ia32_movntss: {
7078*67e74705SXin Li llvm::MDNode *Node = llvm::MDNode::get(
7079*67e74705SXin Li getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7080*67e74705SXin Li
7081*67e74705SXin Li // Extract the 0'th element of the source vector.
7082*67e74705SXin Li Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
7083*67e74705SXin Li
7084*67e74705SXin Li // Convert the type of the pointer to a pointer to the stored type.
7085*67e74705SXin Li Value *BC = Builder.CreateBitCast(Ops[0],
7086*67e74705SXin Li llvm::PointerType::getUnqual(Scl->getType()),
7087*67e74705SXin Li "cast");
7088*67e74705SXin Li
7089*67e74705SXin Li // Unaligned nontemporal store of the scalar value.
7090*67e74705SXin Li StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
7091*67e74705SXin Li SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7092*67e74705SXin Li SI->setAlignment(1);
7093*67e74705SXin Li return SI;
7094*67e74705SXin Li }
7095*67e74705SXin Li
7096*67e74705SXin Li case X86::BI__builtin_ia32_selectb_128:
7097*67e74705SXin Li case X86::BI__builtin_ia32_selectb_256:
7098*67e74705SXin Li case X86::BI__builtin_ia32_selectb_512:
7099*67e74705SXin Li case X86::BI__builtin_ia32_selectw_128:
7100*67e74705SXin Li case X86::BI__builtin_ia32_selectw_256:
7101*67e74705SXin Li case X86::BI__builtin_ia32_selectw_512:
7102*67e74705SXin Li case X86::BI__builtin_ia32_selectd_128:
7103*67e74705SXin Li case X86::BI__builtin_ia32_selectd_256:
7104*67e74705SXin Li case X86::BI__builtin_ia32_selectd_512:
7105*67e74705SXin Li case X86::BI__builtin_ia32_selectq_128:
7106*67e74705SXin Li case X86::BI__builtin_ia32_selectq_256:
7107*67e74705SXin Li case X86::BI__builtin_ia32_selectq_512:
7108*67e74705SXin Li case X86::BI__builtin_ia32_selectps_128:
7109*67e74705SXin Li case X86::BI__builtin_ia32_selectps_256:
7110*67e74705SXin Li case X86::BI__builtin_ia32_selectps_512:
7111*67e74705SXin Li case X86::BI__builtin_ia32_selectpd_128:
7112*67e74705SXin Li case X86::BI__builtin_ia32_selectpd_256:
7113*67e74705SXin Li case X86::BI__builtin_ia32_selectpd_512:
7114*67e74705SXin Li return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7115*67e74705SXin Li case X86::BI__builtin_ia32_pcmpeqb128_mask:
7116*67e74705SXin Li case X86::BI__builtin_ia32_pcmpeqb256_mask:
7117*67e74705SXin Li case X86::BI__builtin_ia32_pcmpeqb512_mask:
7118*67e74705SXin Li case X86::BI__builtin_ia32_pcmpeqw128_mask:
7119*67e74705SXin Li case X86::BI__builtin_ia32_pcmpeqw256_mask:
7120*67e74705SXin Li case X86::BI__builtin_ia32_pcmpeqw512_mask:
7121*67e74705SXin Li case X86::BI__builtin_ia32_pcmpeqd128_mask:
7122*67e74705SXin Li case X86::BI__builtin_ia32_pcmpeqd256_mask:
7123*67e74705SXin Li case X86::BI__builtin_ia32_pcmpeqd512_mask:
7124*67e74705SXin Li case X86::BI__builtin_ia32_pcmpeqq128_mask:
7125*67e74705SXin Li case X86::BI__builtin_ia32_pcmpeqq256_mask:
7126*67e74705SXin Li case X86::BI__builtin_ia32_pcmpeqq512_mask:
7127*67e74705SXin Li return EmitX86MaskedCompare(*this, 0, false, Ops);
7128*67e74705SXin Li case X86::BI__builtin_ia32_pcmpgtb128_mask:
7129*67e74705SXin Li case X86::BI__builtin_ia32_pcmpgtb256_mask:
7130*67e74705SXin Li case X86::BI__builtin_ia32_pcmpgtb512_mask:
7131*67e74705SXin Li case X86::BI__builtin_ia32_pcmpgtw128_mask:
7132*67e74705SXin Li case X86::BI__builtin_ia32_pcmpgtw256_mask:
7133*67e74705SXin Li case X86::BI__builtin_ia32_pcmpgtw512_mask:
7134*67e74705SXin Li case X86::BI__builtin_ia32_pcmpgtd128_mask:
7135*67e74705SXin Li case X86::BI__builtin_ia32_pcmpgtd256_mask:
7136*67e74705SXin Li case X86::BI__builtin_ia32_pcmpgtd512_mask:
7137*67e74705SXin Li case X86::BI__builtin_ia32_pcmpgtq128_mask:
7138*67e74705SXin Li case X86::BI__builtin_ia32_pcmpgtq256_mask:
7139*67e74705SXin Li case X86::BI__builtin_ia32_pcmpgtq512_mask:
7140*67e74705SXin Li return EmitX86MaskedCompare(*this, 6, true, Ops);
7141*67e74705SXin Li case X86::BI__builtin_ia32_cmpb128_mask:
7142*67e74705SXin Li case X86::BI__builtin_ia32_cmpb256_mask:
7143*67e74705SXin Li case X86::BI__builtin_ia32_cmpb512_mask:
7144*67e74705SXin Li case X86::BI__builtin_ia32_cmpw128_mask:
7145*67e74705SXin Li case X86::BI__builtin_ia32_cmpw256_mask:
7146*67e74705SXin Li case X86::BI__builtin_ia32_cmpw512_mask:
7147*67e74705SXin Li case X86::BI__builtin_ia32_cmpd128_mask:
7148*67e74705SXin Li case X86::BI__builtin_ia32_cmpd256_mask:
7149*67e74705SXin Li case X86::BI__builtin_ia32_cmpd512_mask:
7150*67e74705SXin Li case X86::BI__builtin_ia32_cmpq128_mask:
7151*67e74705SXin Li case X86::BI__builtin_ia32_cmpq256_mask:
7152*67e74705SXin Li case X86::BI__builtin_ia32_cmpq512_mask: {
7153*67e74705SXin Li unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7154*67e74705SXin Li return EmitX86MaskedCompare(*this, CC, true, Ops);
7155*67e74705SXin Li }
7156*67e74705SXin Li case X86::BI__builtin_ia32_ucmpb128_mask:
7157*67e74705SXin Li case X86::BI__builtin_ia32_ucmpb256_mask:
7158*67e74705SXin Li case X86::BI__builtin_ia32_ucmpb512_mask:
7159*67e74705SXin Li case X86::BI__builtin_ia32_ucmpw128_mask:
7160*67e74705SXin Li case X86::BI__builtin_ia32_ucmpw256_mask:
7161*67e74705SXin Li case X86::BI__builtin_ia32_ucmpw512_mask:
7162*67e74705SXin Li case X86::BI__builtin_ia32_ucmpd128_mask:
7163*67e74705SXin Li case X86::BI__builtin_ia32_ucmpd256_mask:
7164*67e74705SXin Li case X86::BI__builtin_ia32_ucmpd512_mask:
7165*67e74705SXin Li case X86::BI__builtin_ia32_ucmpq128_mask:
7166*67e74705SXin Li case X86::BI__builtin_ia32_ucmpq256_mask:
7167*67e74705SXin Li case X86::BI__builtin_ia32_ucmpq512_mask: {
7168*67e74705SXin Li unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7169*67e74705SXin Li return EmitX86MaskedCompare(*this, CC, false, Ops);
7170*67e74705SXin Li }
7171*67e74705SXin Li
7172*67e74705SXin Li case X86::BI__builtin_ia32_vplzcntd_128_mask:
7173*67e74705SXin Li case X86::BI__builtin_ia32_vplzcntd_256_mask:
7174*67e74705SXin Li case X86::BI__builtin_ia32_vplzcntd_512_mask:
7175*67e74705SXin Li case X86::BI__builtin_ia32_vplzcntq_128_mask:
7176*67e74705SXin Li case X86::BI__builtin_ia32_vplzcntq_256_mask:
7177*67e74705SXin Li case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7178*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7179*67e74705SXin Li return EmitX86Select(*this, Ops[2],
7180*67e74705SXin Li Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7181*67e74705SXin Li Ops[1]);
7182*67e74705SXin Li }
7183*67e74705SXin Li
7184*67e74705SXin Li // TODO: Handle 64/512-bit vector widths of min/max.
7185*67e74705SXin Li case X86::BI__builtin_ia32_pmaxsb128:
7186*67e74705SXin Li case X86::BI__builtin_ia32_pmaxsw128:
7187*67e74705SXin Li case X86::BI__builtin_ia32_pmaxsd128:
7188*67e74705SXin Li case X86::BI__builtin_ia32_pmaxsb256:
7189*67e74705SXin Li case X86::BI__builtin_ia32_pmaxsw256:
7190*67e74705SXin Li case X86::BI__builtin_ia32_pmaxsd256: {
7191*67e74705SXin Li Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]);
7192*67e74705SXin Li return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7193*67e74705SXin Li }
7194*67e74705SXin Li case X86::BI__builtin_ia32_pmaxub128:
7195*67e74705SXin Li case X86::BI__builtin_ia32_pmaxuw128:
7196*67e74705SXin Li case X86::BI__builtin_ia32_pmaxud128:
7197*67e74705SXin Li case X86::BI__builtin_ia32_pmaxub256:
7198*67e74705SXin Li case X86::BI__builtin_ia32_pmaxuw256:
7199*67e74705SXin Li case X86::BI__builtin_ia32_pmaxud256: {
7200*67e74705SXin Li Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
7201*67e74705SXin Li return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7202*67e74705SXin Li }
7203*67e74705SXin Li case X86::BI__builtin_ia32_pminsb128:
7204*67e74705SXin Li case X86::BI__builtin_ia32_pminsw128:
7205*67e74705SXin Li case X86::BI__builtin_ia32_pminsd128:
7206*67e74705SXin Li case X86::BI__builtin_ia32_pminsb256:
7207*67e74705SXin Li case X86::BI__builtin_ia32_pminsw256:
7208*67e74705SXin Li case X86::BI__builtin_ia32_pminsd256: {
7209*67e74705SXin Li Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]);
7210*67e74705SXin Li return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7211*67e74705SXin Li }
7212*67e74705SXin Li case X86::BI__builtin_ia32_pminub128:
7213*67e74705SXin Li case X86::BI__builtin_ia32_pminuw128:
7214*67e74705SXin Li case X86::BI__builtin_ia32_pminud128:
7215*67e74705SXin Li case X86::BI__builtin_ia32_pminub256:
7216*67e74705SXin Li case X86::BI__builtin_ia32_pminuw256:
7217*67e74705SXin Li case X86::BI__builtin_ia32_pminud256: {
7218*67e74705SXin Li Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]);
7219*67e74705SXin Li return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7220*67e74705SXin Li }
7221*67e74705SXin Li
7222*67e74705SXin Li // 3DNow!
7223*67e74705SXin Li case X86::BI__builtin_ia32_pswapdsf:
7224*67e74705SXin Li case X86::BI__builtin_ia32_pswapdsi: {
7225*67e74705SXin Li llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7226*67e74705SXin Li Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7227*67e74705SXin Li llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7228*67e74705SXin Li return Builder.CreateCall(F, Ops, "pswapd");
7229*67e74705SXin Li }
7230*67e74705SXin Li case X86::BI__builtin_ia32_rdrand16_step:
7231*67e74705SXin Li case X86::BI__builtin_ia32_rdrand32_step:
7232*67e74705SXin Li case X86::BI__builtin_ia32_rdrand64_step:
7233*67e74705SXin Li case X86::BI__builtin_ia32_rdseed16_step:
7234*67e74705SXin Li case X86::BI__builtin_ia32_rdseed32_step:
7235*67e74705SXin Li case X86::BI__builtin_ia32_rdseed64_step: {
7236*67e74705SXin Li Intrinsic::ID ID;
7237*67e74705SXin Li switch (BuiltinID) {
7238*67e74705SXin Li default: llvm_unreachable("Unsupported intrinsic!");
7239*67e74705SXin Li case X86::BI__builtin_ia32_rdrand16_step:
7240*67e74705SXin Li ID = Intrinsic::x86_rdrand_16;
7241*67e74705SXin Li break;
7242*67e74705SXin Li case X86::BI__builtin_ia32_rdrand32_step:
7243*67e74705SXin Li ID = Intrinsic::x86_rdrand_32;
7244*67e74705SXin Li break;
7245*67e74705SXin Li case X86::BI__builtin_ia32_rdrand64_step:
7246*67e74705SXin Li ID = Intrinsic::x86_rdrand_64;
7247*67e74705SXin Li break;
7248*67e74705SXin Li case X86::BI__builtin_ia32_rdseed16_step:
7249*67e74705SXin Li ID = Intrinsic::x86_rdseed_16;
7250*67e74705SXin Li break;
7251*67e74705SXin Li case X86::BI__builtin_ia32_rdseed32_step:
7252*67e74705SXin Li ID = Intrinsic::x86_rdseed_32;
7253*67e74705SXin Li break;
7254*67e74705SXin Li case X86::BI__builtin_ia32_rdseed64_step:
7255*67e74705SXin Li ID = Intrinsic::x86_rdseed_64;
7256*67e74705SXin Li break;
7257*67e74705SXin Li }
7258*67e74705SXin Li
7259*67e74705SXin Li Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7260*67e74705SXin Li Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7261*67e74705SXin Li Ops[0]);
7262*67e74705SXin Li return Builder.CreateExtractValue(Call, 1);
7263*67e74705SXin Li }
7264*67e74705SXin Li
7265*67e74705SXin Li // SSE packed comparison intrinsics
7266*67e74705SXin Li case X86::BI__builtin_ia32_cmpeqps:
7267*67e74705SXin Li case X86::BI__builtin_ia32_cmpeqpd:
7268*67e74705SXin Li return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7269*67e74705SXin Li case X86::BI__builtin_ia32_cmpltps:
7270*67e74705SXin Li case X86::BI__builtin_ia32_cmpltpd:
7271*67e74705SXin Li return getVectorFCmpIR(CmpInst::FCMP_OLT);
7272*67e74705SXin Li case X86::BI__builtin_ia32_cmpleps:
7273*67e74705SXin Li case X86::BI__builtin_ia32_cmplepd:
7274*67e74705SXin Li return getVectorFCmpIR(CmpInst::FCMP_OLE);
7275*67e74705SXin Li case X86::BI__builtin_ia32_cmpunordps:
7276*67e74705SXin Li case X86::BI__builtin_ia32_cmpunordpd:
7277*67e74705SXin Li return getVectorFCmpIR(CmpInst::FCMP_UNO);
7278*67e74705SXin Li case X86::BI__builtin_ia32_cmpneqps:
7279*67e74705SXin Li case X86::BI__builtin_ia32_cmpneqpd:
7280*67e74705SXin Li return getVectorFCmpIR(CmpInst::FCMP_UNE);
7281*67e74705SXin Li case X86::BI__builtin_ia32_cmpnltps:
7282*67e74705SXin Li case X86::BI__builtin_ia32_cmpnltpd:
7283*67e74705SXin Li return getVectorFCmpIR(CmpInst::FCMP_UGE);
7284*67e74705SXin Li case X86::BI__builtin_ia32_cmpnleps:
7285*67e74705SXin Li case X86::BI__builtin_ia32_cmpnlepd:
7286*67e74705SXin Li return getVectorFCmpIR(CmpInst::FCMP_UGT);
7287*67e74705SXin Li case X86::BI__builtin_ia32_cmpordps:
7288*67e74705SXin Li case X86::BI__builtin_ia32_cmpordpd:
7289*67e74705SXin Li return getVectorFCmpIR(CmpInst::FCMP_ORD);
7290*67e74705SXin Li case X86::BI__builtin_ia32_cmpps:
7291*67e74705SXin Li case X86::BI__builtin_ia32_cmpps256:
7292*67e74705SXin Li case X86::BI__builtin_ia32_cmppd:
7293*67e74705SXin Li case X86::BI__builtin_ia32_cmppd256: {
7294*67e74705SXin Li unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7295*67e74705SXin Li // If this one of the SSE immediates, we can use native IR.
7296*67e74705SXin Li if (CC < 8) {
7297*67e74705SXin Li FCmpInst::Predicate Pred;
7298*67e74705SXin Li switch (CC) {
7299*67e74705SXin Li case 0: Pred = FCmpInst::FCMP_OEQ; break;
7300*67e74705SXin Li case 1: Pred = FCmpInst::FCMP_OLT; break;
7301*67e74705SXin Li case 2: Pred = FCmpInst::FCMP_OLE; break;
7302*67e74705SXin Li case 3: Pred = FCmpInst::FCMP_UNO; break;
7303*67e74705SXin Li case 4: Pred = FCmpInst::FCMP_UNE; break;
7304*67e74705SXin Li case 5: Pred = FCmpInst::FCMP_UGE; break;
7305*67e74705SXin Li case 6: Pred = FCmpInst::FCMP_UGT; break;
7306*67e74705SXin Li case 7: Pred = FCmpInst::FCMP_ORD; break;
7307*67e74705SXin Li }
7308*67e74705SXin Li return getVectorFCmpIR(Pred);
7309*67e74705SXin Li }
7310*67e74705SXin Li
7311*67e74705SXin Li // We can't handle 8-31 immediates with native IR, use the intrinsic.
7312*67e74705SXin Li Intrinsic::ID ID;
7313*67e74705SXin Li switch (BuiltinID) {
7314*67e74705SXin Li default: llvm_unreachable("Unsupported intrinsic!");
7315*67e74705SXin Li case X86::BI__builtin_ia32_cmpps:
7316*67e74705SXin Li ID = Intrinsic::x86_sse_cmp_ps;
7317*67e74705SXin Li break;
7318*67e74705SXin Li case X86::BI__builtin_ia32_cmpps256:
7319*67e74705SXin Li ID = Intrinsic::x86_avx_cmp_ps_256;
7320*67e74705SXin Li break;
7321*67e74705SXin Li case X86::BI__builtin_ia32_cmppd:
7322*67e74705SXin Li ID = Intrinsic::x86_sse2_cmp_pd;
7323*67e74705SXin Li break;
7324*67e74705SXin Li case X86::BI__builtin_ia32_cmppd256:
7325*67e74705SXin Li ID = Intrinsic::x86_avx_cmp_pd_256;
7326*67e74705SXin Li break;
7327*67e74705SXin Li }
7328*67e74705SXin Li
7329*67e74705SXin Li return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7330*67e74705SXin Li }
7331*67e74705SXin Li
7332*67e74705SXin Li // SSE scalar comparison intrinsics
7333*67e74705SXin Li case X86::BI__builtin_ia32_cmpeqss:
7334*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7335*67e74705SXin Li case X86::BI__builtin_ia32_cmpltss:
7336*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7337*67e74705SXin Li case X86::BI__builtin_ia32_cmpless:
7338*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7339*67e74705SXin Li case X86::BI__builtin_ia32_cmpunordss:
7340*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7341*67e74705SXin Li case X86::BI__builtin_ia32_cmpneqss:
7342*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7343*67e74705SXin Li case X86::BI__builtin_ia32_cmpnltss:
7344*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7345*67e74705SXin Li case X86::BI__builtin_ia32_cmpnless:
7346*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7347*67e74705SXin Li case X86::BI__builtin_ia32_cmpordss:
7348*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7349*67e74705SXin Li case X86::BI__builtin_ia32_cmpeqsd:
7350*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7351*67e74705SXin Li case X86::BI__builtin_ia32_cmpltsd:
7352*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7353*67e74705SXin Li case X86::BI__builtin_ia32_cmplesd:
7354*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7355*67e74705SXin Li case X86::BI__builtin_ia32_cmpunordsd:
7356*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7357*67e74705SXin Li case X86::BI__builtin_ia32_cmpneqsd:
7358*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7359*67e74705SXin Li case X86::BI__builtin_ia32_cmpnltsd:
7360*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7361*67e74705SXin Li case X86::BI__builtin_ia32_cmpnlesd:
7362*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
7363*67e74705SXin Li case X86::BI__builtin_ia32_cmpordsd:
7364*67e74705SXin Li return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
7365*67e74705SXin Li }
7366*67e74705SXin Li }
7367*67e74705SXin Li
7368*67e74705SXin Li
EmitPPCBuiltinExpr(unsigned BuiltinID,const CallExpr * E)7369*67e74705SXin Li Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
7370*67e74705SXin Li const CallExpr *E) {
7371*67e74705SXin Li SmallVector<Value*, 4> Ops;
7372*67e74705SXin Li
7373*67e74705SXin Li for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
7374*67e74705SXin Li Ops.push_back(EmitScalarExpr(E->getArg(i)));
7375*67e74705SXin Li
7376*67e74705SXin Li Intrinsic::ID ID = Intrinsic::not_intrinsic;
7377*67e74705SXin Li
7378*67e74705SXin Li switch (BuiltinID) {
7379*67e74705SXin Li default: return nullptr;
7380*67e74705SXin Li
7381*67e74705SXin Li // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
7382*67e74705SXin Li // call __builtin_readcyclecounter.
7383*67e74705SXin Li case PPC::BI__builtin_ppc_get_timebase:
7384*67e74705SXin Li return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
7385*67e74705SXin Li
7386*67e74705SXin Li // vec_ld, vec_lvsl, vec_lvsr
7387*67e74705SXin Li case PPC::BI__builtin_altivec_lvx:
7388*67e74705SXin Li case PPC::BI__builtin_altivec_lvxl:
7389*67e74705SXin Li case PPC::BI__builtin_altivec_lvebx:
7390*67e74705SXin Li case PPC::BI__builtin_altivec_lvehx:
7391*67e74705SXin Li case PPC::BI__builtin_altivec_lvewx:
7392*67e74705SXin Li case PPC::BI__builtin_altivec_lvsl:
7393*67e74705SXin Li case PPC::BI__builtin_altivec_lvsr:
7394*67e74705SXin Li case PPC::BI__builtin_vsx_lxvd2x:
7395*67e74705SXin Li case PPC::BI__builtin_vsx_lxvw4x:
7396*67e74705SXin Li {
7397*67e74705SXin Li Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
7398*67e74705SXin Li
7399*67e74705SXin Li Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
7400*67e74705SXin Li Ops.pop_back();
7401*67e74705SXin Li
7402*67e74705SXin Li switch (BuiltinID) {
7403*67e74705SXin Li default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
7404*67e74705SXin Li case PPC::BI__builtin_altivec_lvx:
7405*67e74705SXin Li ID = Intrinsic::ppc_altivec_lvx;
7406*67e74705SXin Li break;
7407*67e74705SXin Li case PPC::BI__builtin_altivec_lvxl:
7408*67e74705SXin Li ID = Intrinsic::ppc_altivec_lvxl;
7409*67e74705SXin Li break;
7410*67e74705SXin Li case PPC::BI__builtin_altivec_lvebx:
7411*67e74705SXin Li ID = Intrinsic::ppc_altivec_lvebx;
7412*67e74705SXin Li break;
7413*67e74705SXin Li case PPC::BI__builtin_altivec_lvehx:
7414*67e74705SXin Li ID = Intrinsic::ppc_altivec_lvehx;
7415*67e74705SXin Li break;
7416*67e74705SXin Li case PPC::BI__builtin_altivec_lvewx:
7417*67e74705SXin Li ID = Intrinsic::ppc_altivec_lvewx;
7418*67e74705SXin Li break;
7419*67e74705SXin Li case PPC::BI__builtin_altivec_lvsl:
7420*67e74705SXin Li ID = Intrinsic::ppc_altivec_lvsl;
7421*67e74705SXin Li break;
7422*67e74705SXin Li case PPC::BI__builtin_altivec_lvsr:
7423*67e74705SXin Li ID = Intrinsic::ppc_altivec_lvsr;
7424*67e74705SXin Li break;
7425*67e74705SXin Li case PPC::BI__builtin_vsx_lxvd2x:
7426*67e74705SXin Li ID = Intrinsic::ppc_vsx_lxvd2x;
7427*67e74705SXin Li break;
7428*67e74705SXin Li case PPC::BI__builtin_vsx_lxvw4x:
7429*67e74705SXin Li ID = Intrinsic::ppc_vsx_lxvw4x;
7430*67e74705SXin Li break;
7431*67e74705SXin Li }
7432*67e74705SXin Li llvm::Function *F = CGM.getIntrinsic(ID);
7433*67e74705SXin Li return Builder.CreateCall(F, Ops, "");
7434*67e74705SXin Li }
7435*67e74705SXin Li
7436*67e74705SXin Li // vec_st
7437*67e74705SXin Li case PPC::BI__builtin_altivec_stvx:
7438*67e74705SXin Li case PPC::BI__builtin_altivec_stvxl:
7439*67e74705SXin Li case PPC::BI__builtin_altivec_stvebx:
7440*67e74705SXin Li case PPC::BI__builtin_altivec_stvehx:
7441*67e74705SXin Li case PPC::BI__builtin_altivec_stvewx:
7442*67e74705SXin Li case PPC::BI__builtin_vsx_stxvd2x:
7443*67e74705SXin Li case PPC::BI__builtin_vsx_stxvw4x:
7444*67e74705SXin Li {
7445*67e74705SXin Li Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
7446*67e74705SXin Li Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
7447*67e74705SXin Li Ops.pop_back();
7448*67e74705SXin Li
7449*67e74705SXin Li switch (BuiltinID) {
7450*67e74705SXin Li default: llvm_unreachable("Unsupported st intrinsic!");
7451*67e74705SXin Li case PPC::BI__builtin_altivec_stvx:
7452*67e74705SXin Li ID = Intrinsic::ppc_altivec_stvx;
7453*67e74705SXin Li break;
7454*67e74705SXin Li case PPC::BI__builtin_altivec_stvxl:
7455*67e74705SXin Li ID = Intrinsic::ppc_altivec_stvxl;
7456*67e74705SXin Li break;
7457*67e74705SXin Li case PPC::BI__builtin_altivec_stvebx:
7458*67e74705SXin Li ID = Intrinsic::ppc_altivec_stvebx;
7459*67e74705SXin Li break;
7460*67e74705SXin Li case PPC::BI__builtin_altivec_stvehx:
7461*67e74705SXin Li ID = Intrinsic::ppc_altivec_stvehx;
7462*67e74705SXin Li break;
7463*67e74705SXin Li case PPC::BI__builtin_altivec_stvewx:
7464*67e74705SXin Li ID = Intrinsic::ppc_altivec_stvewx;
7465*67e74705SXin Li break;
7466*67e74705SXin Li case PPC::BI__builtin_vsx_stxvd2x:
7467*67e74705SXin Li ID = Intrinsic::ppc_vsx_stxvd2x;
7468*67e74705SXin Li break;
7469*67e74705SXin Li case PPC::BI__builtin_vsx_stxvw4x:
7470*67e74705SXin Li ID = Intrinsic::ppc_vsx_stxvw4x;
7471*67e74705SXin Li break;
7472*67e74705SXin Li }
7473*67e74705SXin Li llvm::Function *F = CGM.getIntrinsic(ID);
7474*67e74705SXin Li return Builder.CreateCall(F, Ops, "");
7475*67e74705SXin Li }
7476*67e74705SXin Li // Square root
7477*67e74705SXin Li case PPC::BI__builtin_vsx_xvsqrtsp:
7478*67e74705SXin Li case PPC::BI__builtin_vsx_xvsqrtdp: {
7479*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7480*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7481*67e74705SXin Li ID = Intrinsic::sqrt;
7482*67e74705SXin Li llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7483*67e74705SXin Li return Builder.CreateCall(F, X);
7484*67e74705SXin Li }
7485*67e74705SXin Li // Count leading zeros
7486*67e74705SXin Li case PPC::BI__builtin_altivec_vclzb:
7487*67e74705SXin Li case PPC::BI__builtin_altivec_vclzh:
7488*67e74705SXin Li case PPC::BI__builtin_altivec_vclzw:
7489*67e74705SXin Li case PPC::BI__builtin_altivec_vclzd: {
7490*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7491*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7492*67e74705SXin Li Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7493*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7494*67e74705SXin Li return Builder.CreateCall(F, {X, Undef});
7495*67e74705SXin Li }
7496*67e74705SXin Li // Copy sign
7497*67e74705SXin Li case PPC::BI__builtin_vsx_xvcpsgnsp:
7498*67e74705SXin Li case PPC::BI__builtin_vsx_xvcpsgndp: {
7499*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7500*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7501*67e74705SXin Li Value *Y = EmitScalarExpr(E->getArg(1));
7502*67e74705SXin Li ID = Intrinsic::copysign;
7503*67e74705SXin Li llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7504*67e74705SXin Li return Builder.CreateCall(F, {X, Y});
7505*67e74705SXin Li }
7506*67e74705SXin Li // Rounding/truncation
7507*67e74705SXin Li case PPC::BI__builtin_vsx_xvrspip:
7508*67e74705SXin Li case PPC::BI__builtin_vsx_xvrdpip:
7509*67e74705SXin Li case PPC::BI__builtin_vsx_xvrdpim:
7510*67e74705SXin Li case PPC::BI__builtin_vsx_xvrspim:
7511*67e74705SXin Li case PPC::BI__builtin_vsx_xvrdpi:
7512*67e74705SXin Li case PPC::BI__builtin_vsx_xvrspi:
7513*67e74705SXin Li case PPC::BI__builtin_vsx_xvrdpic:
7514*67e74705SXin Li case PPC::BI__builtin_vsx_xvrspic:
7515*67e74705SXin Li case PPC::BI__builtin_vsx_xvrdpiz:
7516*67e74705SXin Li case PPC::BI__builtin_vsx_xvrspiz: {
7517*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7518*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7519*67e74705SXin Li if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
7520*67e74705SXin Li BuiltinID == PPC::BI__builtin_vsx_xvrspim)
7521*67e74705SXin Li ID = Intrinsic::floor;
7522*67e74705SXin Li else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
7523*67e74705SXin Li BuiltinID == PPC::BI__builtin_vsx_xvrspi)
7524*67e74705SXin Li ID = Intrinsic::round;
7525*67e74705SXin Li else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
7526*67e74705SXin Li BuiltinID == PPC::BI__builtin_vsx_xvrspic)
7527*67e74705SXin Li ID = Intrinsic::nearbyint;
7528*67e74705SXin Li else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
7529*67e74705SXin Li BuiltinID == PPC::BI__builtin_vsx_xvrspip)
7530*67e74705SXin Li ID = Intrinsic::ceil;
7531*67e74705SXin Li else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
7532*67e74705SXin Li BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
7533*67e74705SXin Li ID = Intrinsic::trunc;
7534*67e74705SXin Li llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7535*67e74705SXin Li return Builder.CreateCall(F, X);
7536*67e74705SXin Li }
7537*67e74705SXin Li
7538*67e74705SXin Li // Absolute value
7539*67e74705SXin Li case PPC::BI__builtin_vsx_xvabsdp:
7540*67e74705SXin Li case PPC::BI__builtin_vsx_xvabssp: {
7541*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7542*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7543*67e74705SXin Li llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7544*67e74705SXin Li return Builder.CreateCall(F, X);
7545*67e74705SXin Li }
7546*67e74705SXin Li
7547*67e74705SXin Li // FMA variations
7548*67e74705SXin Li case PPC::BI__builtin_vsx_xvmaddadp:
7549*67e74705SXin Li case PPC::BI__builtin_vsx_xvmaddasp:
7550*67e74705SXin Li case PPC::BI__builtin_vsx_xvnmaddadp:
7551*67e74705SXin Li case PPC::BI__builtin_vsx_xvnmaddasp:
7552*67e74705SXin Li case PPC::BI__builtin_vsx_xvmsubadp:
7553*67e74705SXin Li case PPC::BI__builtin_vsx_xvmsubasp:
7554*67e74705SXin Li case PPC::BI__builtin_vsx_xvnmsubadp:
7555*67e74705SXin Li case PPC::BI__builtin_vsx_xvnmsubasp: {
7556*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7557*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7558*67e74705SXin Li Value *Y = EmitScalarExpr(E->getArg(1));
7559*67e74705SXin Li Value *Z = EmitScalarExpr(E->getArg(2));
7560*67e74705SXin Li Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7561*67e74705SXin Li llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7562*67e74705SXin Li switch (BuiltinID) {
7563*67e74705SXin Li case PPC::BI__builtin_vsx_xvmaddadp:
7564*67e74705SXin Li case PPC::BI__builtin_vsx_xvmaddasp:
7565*67e74705SXin Li return Builder.CreateCall(F, {X, Y, Z});
7566*67e74705SXin Li case PPC::BI__builtin_vsx_xvnmaddadp:
7567*67e74705SXin Li case PPC::BI__builtin_vsx_xvnmaddasp:
7568*67e74705SXin Li return Builder.CreateFSub(Zero,
7569*67e74705SXin Li Builder.CreateCall(F, {X, Y, Z}), "sub");
7570*67e74705SXin Li case PPC::BI__builtin_vsx_xvmsubadp:
7571*67e74705SXin Li case PPC::BI__builtin_vsx_xvmsubasp:
7572*67e74705SXin Li return Builder.CreateCall(F,
7573*67e74705SXin Li {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7574*67e74705SXin Li case PPC::BI__builtin_vsx_xvnmsubadp:
7575*67e74705SXin Li case PPC::BI__builtin_vsx_xvnmsubasp:
7576*67e74705SXin Li Value *FsubRes =
7577*67e74705SXin Li Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7578*67e74705SXin Li return Builder.CreateFSub(Zero, FsubRes, "sub");
7579*67e74705SXin Li }
7580*67e74705SXin Li llvm_unreachable("Unknown FMA operation");
7581*67e74705SXin Li return nullptr; // Suppress no-return warning
7582*67e74705SXin Li }
7583*67e74705SXin Li }
7584*67e74705SXin Li }
7585*67e74705SXin Li
EmitAMDGPUBuiltinExpr(unsigned BuiltinID,const CallExpr * E)7586*67e74705SXin Li Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
7587*67e74705SXin Li const CallExpr *E) {
7588*67e74705SXin Li switch (BuiltinID) {
7589*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_div_scale:
7590*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_div_scalef: {
7591*67e74705SXin Li // Translate from the intrinsics's struct return to the builtin's out
7592*67e74705SXin Li // argument.
7593*67e74705SXin Li
7594*67e74705SXin Li Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
7595*67e74705SXin Li
7596*67e74705SXin Li llvm::Value *X = EmitScalarExpr(E->getArg(0));
7597*67e74705SXin Li llvm::Value *Y = EmitScalarExpr(E->getArg(1));
7598*67e74705SXin Li llvm::Value *Z = EmitScalarExpr(E->getArg(2));
7599*67e74705SXin Li
7600*67e74705SXin Li llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
7601*67e74705SXin Li X->getType());
7602*67e74705SXin Li
7603*67e74705SXin Li llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
7604*67e74705SXin Li
7605*67e74705SXin Li llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
7606*67e74705SXin Li llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
7607*67e74705SXin Li
7608*67e74705SXin Li llvm::Type *RealFlagType
7609*67e74705SXin Li = FlagOutPtr.getPointer()->getType()->getPointerElementType();
7610*67e74705SXin Li
7611*67e74705SXin Li llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
7612*67e74705SXin Li Builder.CreateStore(FlagExt, FlagOutPtr);
7613*67e74705SXin Li return Result;
7614*67e74705SXin Li }
7615*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_div_fmas:
7616*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
7617*67e74705SXin Li llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
7618*67e74705SXin Li llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
7619*67e74705SXin Li llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
7620*67e74705SXin Li llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
7621*67e74705SXin Li
7622*67e74705SXin Li llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
7623*67e74705SXin Li Src0->getType());
7624*67e74705SXin Li llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
7625*67e74705SXin Li return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
7626*67e74705SXin Li }
7627*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_div_fixup:
7628*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_div_fixupf:
7629*67e74705SXin Li return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
7630*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_trig_preop:
7631*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_trig_preopf:
7632*67e74705SXin Li return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
7633*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_rcp:
7634*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_rcpf:
7635*67e74705SXin Li return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
7636*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_rsq:
7637*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_rsqf:
7638*67e74705SXin Li return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
7639*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
7640*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
7641*67e74705SXin Li return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
7642*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_sinf:
7643*67e74705SXin Li return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
7644*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_cosf:
7645*67e74705SXin Li return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
7646*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_log_clampf:
7647*67e74705SXin Li return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
7648*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_ldexp:
7649*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_ldexpf:
7650*67e74705SXin Li return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
7651*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_frexp_mant:
7652*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_frexp_mantf: {
7653*67e74705SXin Li return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
7654*67e74705SXin Li }
7655*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_frexp_exp:
7656*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
7657*67e74705SXin Li return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp);
7658*67e74705SXin Li }
7659*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_fract:
7660*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_fractf:
7661*67e74705SXin Li return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
7662*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_class:
7663*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_classf:
7664*67e74705SXin Li return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
7665*67e74705SXin Li
7666*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_read_exec: {
7667*67e74705SXin Li CallInst *CI = cast<CallInst>(
7668*67e74705SXin Li EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
7669*67e74705SXin Li CI->setConvergent();
7670*67e74705SXin Li return CI;
7671*67e74705SXin Li }
7672*67e74705SXin Li // Legacy amdgpu prefix
7673*67e74705SXin Li case AMDGPU::BI__builtin_amdgpu_rsq:
7674*67e74705SXin Li case AMDGPU::BI__builtin_amdgpu_rsqf: {
7675*67e74705SXin Li if (getTarget().getTriple().getArch() == Triple::amdgcn)
7676*67e74705SXin Li return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
7677*67e74705SXin Li return emitUnaryBuiltin(*this, E, Intrinsic::r600_rsq);
7678*67e74705SXin Li }
7679*67e74705SXin Li case AMDGPU::BI__builtin_amdgpu_ldexp:
7680*67e74705SXin Li case AMDGPU::BI__builtin_amdgpu_ldexpf: {
7681*67e74705SXin Li if (getTarget().getTriple().getArch() == Triple::amdgcn)
7682*67e74705SXin Li return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
7683*67e74705SXin Li return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
7684*67e74705SXin Li }
7685*67e74705SXin Li
7686*67e74705SXin Li // amdgcn workitem
7687*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
7688*67e74705SXin Li return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
7689*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
7690*67e74705SXin Li return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
7691*67e74705SXin Li case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
7692*67e74705SXin Li return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
7693*67e74705SXin Li
7694*67e74705SXin Li // r600 workitem
7695*67e74705SXin Li case AMDGPU::BI__builtin_r600_read_tidig_x:
7696*67e74705SXin Li return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
7697*67e74705SXin Li case AMDGPU::BI__builtin_r600_read_tidig_y:
7698*67e74705SXin Li return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
7699*67e74705SXin Li case AMDGPU::BI__builtin_r600_read_tidig_z:
7700*67e74705SXin Li return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
7701*67e74705SXin Li default:
7702*67e74705SXin Li return nullptr;
7703*67e74705SXin Li }
7704*67e74705SXin Li }
7705*67e74705SXin Li
7706*67e74705SXin Li /// Handle a SystemZ function in which the final argument is a pointer
7707*67e74705SXin Li /// to an int that receives the post-instruction CC value. At the LLVM level
7708*67e74705SXin Li /// this is represented as a function that returns a {result, cc} pair.
EmitSystemZIntrinsicWithCC(CodeGenFunction & CGF,unsigned IntrinsicID,const CallExpr * E)7709*67e74705SXin Li static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
7710*67e74705SXin Li unsigned IntrinsicID,
7711*67e74705SXin Li const CallExpr *E) {
7712*67e74705SXin Li unsigned NumArgs = E->getNumArgs() - 1;
7713*67e74705SXin Li SmallVector<Value *, 8> Args(NumArgs);
7714*67e74705SXin Li for (unsigned I = 0; I < NumArgs; ++I)
7715*67e74705SXin Li Args[I] = CGF.EmitScalarExpr(E->getArg(I));
7716*67e74705SXin Li Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
7717*67e74705SXin Li Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
7718*67e74705SXin Li Value *Call = CGF.Builder.CreateCall(F, Args);
7719*67e74705SXin Li Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
7720*67e74705SXin Li CGF.Builder.CreateStore(CC, CCPtr);
7721*67e74705SXin Li return CGF.Builder.CreateExtractValue(Call, 0);
7722*67e74705SXin Li }
7723*67e74705SXin Li
EmitSystemZBuiltinExpr(unsigned BuiltinID,const CallExpr * E)7724*67e74705SXin Li Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
7725*67e74705SXin Li const CallExpr *E) {
7726*67e74705SXin Li switch (BuiltinID) {
7727*67e74705SXin Li case SystemZ::BI__builtin_tbegin: {
7728*67e74705SXin Li Value *TDB = EmitScalarExpr(E->getArg(0));
7729*67e74705SXin Li Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
7730*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
7731*67e74705SXin Li return Builder.CreateCall(F, {TDB, Control});
7732*67e74705SXin Li }
7733*67e74705SXin Li case SystemZ::BI__builtin_tbegin_nofloat: {
7734*67e74705SXin Li Value *TDB = EmitScalarExpr(E->getArg(0));
7735*67e74705SXin Li Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
7736*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
7737*67e74705SXin Li return Builder.CreateCall(F, {TDB, Control});
7738*67e74705SXin Li }
7739*67e74705SXin Li case SystemZ::BI__builtin_tbeginc: {
7740*67e74705SXin Li Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
7741*67e74705SXin Li Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
7742*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
7743*67e74705SXin Li return Builder.CreateCall(F, {TDB, Control});
7744*67e74705SXin Li }
7745*67e74705SXin Li case SystemZ::BI__builtin_tabort: {
7746*67e74705SXin Li Value *Data = EmitScalarExpr(E->getArg(0));
7747*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
7748*67e74705SXin Li return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
7749*67e74705SXin Li }
7750*67e74705SXin Li case SystemZ::BI__builtin_non_tx_store: {
7751*67e74705SXin Li Value *Address = EmitScalarExpr(E->getArg(0));
7752*67e74705SXin Li Value *Data = EmitScalarExpr(E->getArg(1));
7753*67e74705SXin Li Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
7754*67e74705SXin Li return Builder.CreateCall(F, {Data, Address});
7755*67e74705SXin Li }
7756*67e74705SXin Li
7757*67e74705SXin Li // Vector builtins. Note that most vector builtins are mapped automatically
7758*67e74705SXin Li // to target-specific LLVM intrinsics. The ones handled specially here can
7759*67e74705SXin Li // be represented via standard LLVM IR, which is preferable to enable common
7760*67e74705SXin Li // LLVM optimizations.
7761*67e74705SXin Li
7762*67e74705SXin Li case SystemZ::BI__builtin_s390_vpopctb:
7763*67e74705SXin Li case SystemZ::BI__builtin_s390_vpopcth:
7764*67e74705SXin Li case SystemZ::BI__builtin_s390_vpopctf:
7765*67e74705SXin Li case SystemZ::BI__builtin_s390_vpopctg: {
7766*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7767*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7768*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7769*67e74705SXin Li return Builder.CreateCall(F, X);
7770*67e74705SXin Li }
7771*67e74705SXin Li
7772*67e74705SXin Li case SystemZ::BI__builtin_s390_vclzb:
7773*67e74705SXin Li case SystemZ::BI__builtin_s390_vclzh:
7774*67e74705SXin Li case SystemZ::BI__builtin_s390_vclzf:
7775*67e74705SXin Li case SystemZ::BI__builtin_s390_vclzg: {
7776*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7777*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7778*67e74705SXin Li Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7779*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7780*67e74705SXin Li return Builder.CreateCall(F, {X, Undef});
7781*67e74705SXin Li }
7782*67e74705SXin Li
7783*67e74705SXin Li case SystemZ::BI__builtin_s390_vctzb:
7784*67e74705SXin Li case SystemZ::BI__builtin_s390_vctzh:
7785*67e74705SXin Li case SystemZ::BI__builtin_s390_vctzf:
7786*67e74705SXin Li case SystemZ::BI__builtin_s390_vctzg: {
7787*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7788*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7789*67e74705SXin Li Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7790*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
7791*67e74705SXin Li return Builder.CreateCall(F, {X, Undef});
7792*67e74705SXin Li }
7793*67e74705SXin Li
7794*67e74705SXin Li case SystemZ::BI__builtin_s390_vfsqdb: {
7795*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7796*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7797*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
7798*67e74705SXin Li return Builder.CreateCall(F, X);
7799*67e74705SXin Li }
7800*67e74705SXin Li case SystemZ::BI__builtin_s390_vfmadb: {
7801*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7802*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7803*67e74705SXin Li Value *Y = EmitScalarExpr(E->getArg(1));
7804*67e74705SXin Li Value *Z = EmitScalarExpr(E->getArg(2));
7805*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7806*67e74705SXin Li return Builder.CreateCall(F, {X, Y, Z});
7807*67e74705SXin Li }
7808*67e74705SXin Li case SystemZ::BI__builtin_s390_vfmsdb: {
7809*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7810*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7811*67e74705SXin Li Value *Y = EmitScalarExpr(E->getArg(1));
7812*67e74705SXin Li Value *Z = EmitScalarExpr(E->getArg(2));
7813*67e74705SXin Li Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7814*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7815*67e74705SXin Li return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7816*67e74705SXin Li }
7817*67e74705SXin Li case SystemZ::BI__builtin_s390_vflpdb: {
7818*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7819*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7820*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7821*67e74705SXin Li return Builder.CreateCall(F, X);
7822*67e74705SXin Li }
7823*67e74705SXin Li case SystemZ::BI__builtin_s390_vflndb: {
7824*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7825*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7826*67e74705SXin Li Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7827*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7828*67e74705SXin Li return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
7829*67e74705SXin Li }
7830*67e74705SXin Li case SystemZ::BI__builtin_s390_vfidb: {
7831*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
7832*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
7833*67e74705SXin Li // Constant-fold the M4 and M5 mask arguments.
7834*67e74705SXin Li llvm::APSInt M4, M5;
7835*67e74705SXin Li bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
7836*67e74705SXin Li bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
7837*67e74705SXin Li assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
7838*67e74705SXin Li (void)IsConstM4; (void)IsConstM5;
7839*67e74705SXin Li // Check whether this instance of vfidb can be represented via a LLVM
7840*67e74705SXin Li // standard intrinsic. We only support some combinations of M4 and M5.
7841*67e74705SXin Li Intrinsic::ID ID = Intrinsic::not_intrinsic;
7842*67e74705SXin Li switch (M4.getZExtValue()) {
7843*67e74705SXin Li default: break;
7844*67e74705SXin Li case 0: // IEEE-inexact exception allowed
7845*67e74705SXin Li switch (M5.getZExtValue()) {
7846*67e74705SXin Li default: break;
7847*67e74705SXin Li case 0: ID = Intrinsic::rint; break;
7848*67e74705SXin Li }
7849*67e74705SXin Li break;
7850*67e74705SXin Li case 4: // IEEE-inexact exception suppressed
7851*67e74705SXin Li switch (M5.getZExtValue()) {
7852*67e74705SXin Li default: break;
7853*67e74705SXin Li case 0: ID = Intrinsic::nearbyint; break;
7854*67e74705SXin Li case 1: ID = Intrinsic::round; break;
7855*67e74705SXin Li case 5: ID = Intrinsic::trunc; break;
7856*67e74705SXin Li case 6: ID = Intrinsic::ceil; break;
7857*67e74705SXin Li case 7: ID = Intrinsic::floor; break;
7858*67e74705SXin Li }
7859*67e74705SXin Li break;
7860*67e74705SXin Li }
7861*67e74705SXin Li if (ID != Intrinsic::not_intrinsic) {
7862*67e74705SXin Li Function *F = CGM.getIntrinsic(ID, ResultType);
7863*67e74705SXin Li return Builder.CreateCall(F, X);
7864*67e74705SXin Li }
7865*67e74705SXin Li Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
7866*67e74705SXin Li Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
7867*67e74705SXin Li Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
7868*67e74705SXin Li return Builder.CreateCall(F, {X, M4Value, M5Value});
7869*67e74705SXin Li }
7870*67e74705SXin Li
7871*67e74705SXin Li // Vector intrisincs that output the post-instruction CC value.
7872*67e74705SXin Li
7873*67e74705SXin Li #define INTRINSIC_WITH_CC(NAME) \
7874*67e74705SXin Li case SystemZ::BI__builtin_##NAME: \
7875*67e74705SXin Li return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
7876*67e74705SXin Li
7877*67e74705SXin Li INTRINSIC_WITH_CC(s390_vpkshs);
7878*67e74705SXin Li INTRINSIC_WITH_CC(s390_vpksfs);
7879*67e74705SXin Li INTRINSIC_WITH_CC(s390_vpksgs);
7880*67e74705SXin Li
7881*67e74705SXin Li INTRINSIC_WITH_CC(s390_vpklshs);
7882*67e74705SXin Li INTRINSIC_WITH_CC(s390_vpklsfs);
7883*67e74705SXin Li INTRINSIC_WITH_CC(s390_vpklsgs);
7884*67e74705SXin Li
7885*67e74705SXin Li INTRINSIC_WITH_CC(s390_vceqbs);
7886*67e74705SXin Li INTRINSIC_WITH_CC(s390_vceqhs);
7887*67e74705SXin Li INTRINSIC_WITH_CC(s390_vceqfs);
7888*67e74705SXin Li INTRINSIC_WITH_CC(s390_vceqgs);
7889*67e74705SXin Li
7890*67e74705SXin Li INTRINSIC_WITH_CC(s390_vchbs);
7891*67e74705SXin Li INTRINSIC_WITH_CC(s390_vchhs);
7892*67e74705SXin Li INTRINSIC_WITH_CC(s390_vchfs);
7893*67e74705SXin Li INTRINSIC_WITH_CC(s390_vchgs);
7894*67e74705SXin Li
7895*67e74705SXin Li INTRINSIC_WITH_CC(s390_vchlbs);
7896*67e74705SXin Li INTRINSIC_WITH_CC(s390_vchlhs);
7897*67e74705SXin Li INTRINSIC_WITH_CC(s390_vchlfs);
7898*67e74705SXin Li INTRINSIC_WITH_CC(s390_vchlgs);
7899*67e74705SXin Li
7900*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfaebs);
7901*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfaehs);
7902*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfaefs);
7903*67e74705SXin Li
7904*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfaezbs);
7905*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfaezhs);
7906*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfaezfs);
7907*67e74705SXin Li
7908*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfeebs);
7909*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfeehs);
7910*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfeefs);
7911*67e74705SXin Li
7912*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfeezbs);
7913*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfeezhs);
7914*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfeezfs);
7915*67e74705SXin Li
7916*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfenebs);
7917*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfenehs);
7918*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfenefs);
7919*67e74705SXin Li
7920*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfenezbs);
7921*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfenezhs);
7922*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfenezfs);
7923*67e74705SXin Li
7924*67e74705SXin Li INTRINSIC_WITH_CC(s390_vistrbs);
7925*67e74705SXin Li INTRINSIC_WITH_CC(s390_vistrhs);
7926*67e74705SXin Li INTRINSIC_WITH_CC(s390_vistrfs);
7927*67e74705SXin Li
7928*67e74705SXin Li INTRINSIC_WITH_CC(s390_vstrcbs);
7929*67e74705SXin Li INTRINSIC_WITH_CC(s390_vstrchs);
7930*67e74705SXin Li INTRINSIC_WITH_CC(s390_vstrcfs);
7931*67e74705SXin Li
7932*67e74705SXin Li INTRINSIC_WITH_CC(s390_vstrczbs);
7933*67e74705SXin Li INTRINSIC_WITH_CC(s390_vstrczhs);
7934*67e74705SXin Li INTRINSIC_WITH_CC(s390_vstrczfs);
7935*67e74705SXin Li
7936*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfcedbs);
7937*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfchdbs);
7938*67e74705SXin Li INTRINSIC_WITH_CC(s390_vfchedbs);
7939*67e74705SXin Li
7940*67e74705SXin Li INTRINSIC_WITH_CC(s390_vftcidb);
7941*67e74705SXin Li
7942*67e74705SXin Li #undef INTRINSIC_WITH_CC
7943*67e74705SXin Li
7944*67e74705SXin Li default:
7945*67e74705SXin Li return nullptr;
7946*67e74705SXin Li }
7947*67e74705SXin Li }
7948*67e74705SXin Li
EmitNVPTXBuiltinExpr(unsigned BuiltinID,const CallExpr * E)7949*67e74705SXin Li Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
7950*67e74705SXin Li const CallExpr *E) {
7951*67e74705SXin Li auto MakeLdg = [&](unsigned IntrinsicID) {
7952*67e74705SXin Li Value *Ptr = EmitScalarExpr(E->getArg(0));
7953*67e74705SXin Li AlignmentSource AlignSource;
7954*67e74705SXin Li clang::CharUnits Align =
7955*67e74705SXin Li getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
7956*67e74705SXin Li return Builder.CreateCall(
7957*67e74705SXin Li CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
7958*67e74705SXin Li Ptr->getType()}),
7959*67e74705SXin Li {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
7960*67e74705SXin Li };
7961*67e74705SXin Li
7962*67e74705SXin Li switch (BuiltinID) {
7963*67e74705SXin Li case NVPTX::BI__nvvm_atom_add_gen_i:
7964*67e74705SXin Li case NVPTX::BI__nvvm_atom_add_gen_l:
7965*67e74705SXin Li case NVPTX::BI__nvvm_atom_add_gen_ll:
7966*67e74705SXin Li return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
7967*67e74705SXin Li
7968*67e74705SXin Li case NVPTX::BI__nvvm_atom_sub_gen_i:
7969*67e74705SXin Li case NVPTX::BI__nvvm_atom_sub_gen_l:
7970*67e74705SXin Li case NVPTX::BI__nvvm_atom_sub_gen_ll:
7971*67e74705SXin Li return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
7972*67e74705SXin Li
7973*67e74705SXin Li case NVPTX::BI__nvvm_atom_and_gen_i:
7974*67e74705SXin Li case NVPTX::BI__nvvm_atom_and_gen_l:
7975*67e74705SXin Li case NVPTX::BI__nvvm_atom_and_gen_ll:
7976*67e74705SXin Li return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
7977*67e74705SXin Li
7978*67e74705SXin Li case NVPTX::BI__nvvm_atom_or_gen_i:
7979*67e74705SXin Li case NVPTX::BI__nvvm_atom_or_gen_l:
7980*67e74705SXin Li case NVPTX::BI__nvvm_atom_or_gen_ll:
7981*67e74705SXin Li return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
7982*67e74705SXin Li
7983*67e74705SXin Li case NVPTX::BI__nvvm_atom_xor_gen_i:
7984*67e74705SXin Li case NVPTX::BI__nvvm_atom_xor_gen_l:
7985*67e74705SXin Li case NVPTX::BI__nvvm_atom_xor_gen_ll:
7986*67e74705SXin Li return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
7987*67e74705SXin Li
7988*67e74705SXin Li case NVPTX::BI__nvvm_atom_xchg_gen_i:
7989*67e74705SXin Li case NVPTX::BI__nvvm_atom_xchg_gen_l:
7990*67e74705SXin Li case NVPTX::BI__nvvm_atom_xchg_gen_ll:
7991*67e74705SXin Li return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
7992*67e74705SXin Li
7993*67e74705SXin Li case NVPTX::BI__nvvm_atom_max_gen_i:
7994*67e74705SXin Li case NVPTX::BI__nvvm_atom_max_gen_l:
7995*67e74705SXin Li case NVPTX::BI__nvvm_atom_max_gen_ll:
7996*67e74705SXin Li return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
7997*67e74705SXin Li
7998*67e74705SXin Li case NVPTX::BI__nvvm_atom_max_gen_ui:
7999*67e74705SXin Li case NVPTX::BI__nvvm_atom_max_gen_ul:
8000*67e74705SXin Li case NVPTX::BI__nvvm_atom_max_gen_ull:
8001*67e74705SXin Li return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
8002*67e74705SXin Li
8003*67e74705SXin Li case NVPTX::BI__nvvm_atom_min_gen_i:
8004*67e74705SXin Li case NVPTX::BI__nvvm_atom_min_gen_l:
8005*67e74705SXin Li case NVPTX::BI__nvvm_atom_min_gen_ll:
8006*67e74705SXin Li return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
8007*67e74705SXin Li
8008*67e74705SXin Li case NVPTX::BI__nvvm_atom_min_gen_ui:
8009*67e74705SXin Li case NVPTX::BI__nvvm_atom_min_gen_ul:
8010*67e74705SXin Li case NVPTX::BI__nvvm_atom_min_gen_ull:
8011*67e74705SXin Li return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
8012*67e74705SXin Li
8013*67e74705SXin Li case NVPTX::BI__nvvm_atom_cas_gen_i:
8014*67e74705SXin Li case NVPTX::BI__nvvm_atom_cas_gen_l:
8015*67e74705SXin Li case NVPTX::BI__nvvm_atom_cas_gen_ll:
8016*67e74705SXin Li // __nvvm_atom_cas_gen_* should return the old value rather than the
8017*67e74705SXin Li // success flag.
8018*67e74705SXin Li return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
8019*67e74705SXin Li
8020*67e74705SXin Li case NVPTX::BI__nvvm_atom_add_gen_f: {
8021*67e74705SXin Li Value *Ptr = EmitScalarExpr(E->getArg(0));
8022*67e74705SXin Li Value *Val = EmitScalarExpr(E->getArg(1));
8023*67e74705SXin Li // atomicrmw only deals with integer arguments so we need to use
8024*67e74705SXin Li // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
8025*67e74705SXin Li Value *FnALAF32 =
8026*67e74705SXin Li CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
8027*67e74705SXin Li return Builder.CreateCall(FnALAF32, {Ptr, Val});
8028*67e74705SXin Li }
8029*67e74705SXin Li
8030*67e74705SXin Li case NVPTX::BI__nvvm_atom_inc_gen_ui: {
8031*67e74705SXin Li Value *Ptr = EmitScalarExpr(E->getArg(0));
8032*67e74705SXin Li Value *Val = EmitScalarExpr(E->getArg(1));
8033*67e74705SXin Li Value *FnALI32 =
8034*67e74705SXin Li CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
8035*67e74705SXin Li return Builder.CreateCall(FnALI32, {Ptr, Val});
8036*67e74705SXin Li }
8037*67e74705SXin Li
8038*67e74705SXin Li case NVPTX::BI__nvvm_atom_dec_gen_ui: {
8039*67e74705SXin Li Value *Ptr = EmitScalarExpr(E->getArg(0));
8040*67e74705SXin Li Value *Val = EmitScalarExpr(E->getArg(1));
8041*67e74705SXin Li Value *FnALD32 =
8042*67e74705SXin Li CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
8043*67e74705SXin Li return Builder.CreateCall(FnALD32, {Ptr, Val});
8044*67e74705SXin Li }
8045*67e74705SXin Li
8046*67e74705SXin Li case NVPTX::BI__nvvm_ldg_c:
8047*67e74705SXin Li case NVPTX::BI__nvvm_ldg_c2:
8048*67e74705SXin Li case NVPTX::BI__nvvm_ldg_c4:
8049*67e74705SXin Li case NVPTX::BI__nvvm_ldg_s:
8050*67e74705SXin Li case NVPTX::BI__nvvm_ldg_s2:
8051*67e74705SXin Li case NVPTX::BI__nvvm_ldg_s4:
8052*67e74705SXin Li case NVPTX::BI__nvvm_ldg_i:
8053*67e74705SXin Li case NVPTX::BI__nvvm_ldg_i2:
8054*67e74705SXin Li case NVPTX::BI__nvvm_ldg_i4:
8055*67e74705SXin Li case NVPTX::BI__nvvm_ldg_l:
8056*67e74705SXin Li case NVPTX::BI__nvvm_ldg_ll:
8057*67e74705SXin Li case NVPTX::BI__nvvm_ldg_ll2:
8058*67e74705SXin Li case NVPTX::BI__nvvm_ldg_uc:
8059*67e74705SXin Li case NVPTX::BI__nvvm_ldg_uc2:
8060*67e74705SXin Li case NVPTX::BI__nvvm_ldg_uc4:
8061*67e74705SXin Li case NVPTX::BI__nvvm_ldg_us:
8062*67e74705SXin Li case NVPTX::BI__nvvm_ldg_us2:
8063*67e74705SXin Li case NVPTX::BI__nvvm_ldg_us4:
8064*67e74705SXin Li case NVPTX::BI__nvvm_ldg_ui:
8065*67e74705SXin Li case NVPTX::BI__nvvm_ldg_ui2:
8066*67e74705SXin Li case NVPTX::BI__nvvm_ldg_ui4:
8067*67e74705SXin Li case NVPTX::BI__nvvm_ldg_ul:
8068*67e74705SXin Li case NVPTX::BI__nvvm_ldg_ull:
8069*67e74705SXin Li case NVPTX::BI__nvvm_ldg_ull2:
8070*67e74705SXin Li // PTX Interoperability section 2.2: "For a vector with an even number of
8071*67e74705SXin Li // elements, its alignment is set to number of elements times the alignment
8072*67e74705SXin Li // of its member: n*alignof(t)."
8073*67e74705SXin Li return MakeLdg(Intrinsic::nvvm_ldg_global_i);
8074*67e74705SXin Li case NVPTX::BI__nvvm_ldg_f:
8075*67e74705SXin Li case NVPTX::BI__nvvm_ldg_f2:
8076*67e74705SXin Li case NVPTX::BI__nvvm_ldg_f4:
8077*67e74705SXin Li case NVPTX::BI__nvvm_ldg_d:
8078*67e74705SXin Li case NVPTX::BI__nvvm_ldg_d2:
8079*67e74705SXin Li return MakeLdg(Intrinsic::nvvm_ldg_global_f);
8080*67e74705SXin Li default:
8081*67e74705SXin Li return nullptr;
8082*67e74705SXin Li }
8083*67e74705SXin Li }
8084*67e74705SXin Li
EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,const CallExpr * E)8085*67e74705SXin Li Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
8086*67e74705SXin Li const CallExpr *E) {
8087*67e74705SXin Li switch (BuiltinID) {
8088*67e74705SXin Li case WebAssembly::BI__builtin_wasm_current_memory: {
8089*67e74705SXin Li llvm::Type *ResultType = ConvertType(E->getType());
8090*67e74705SXin Li Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
8091*67e74705SXin Li return Builder.CreateCall(Callee);
8092*67e74705SXin Li }
8093*67e74705SXin Li case WebAssembly::BI__builtin_wasm_grow_memory: {
8094*67e74705SXin Li Value *X = EmitScalarExpr(E->getArg(0));
8095*67e74705SXin Li Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
8096*67e74705SXin Li return Builder.CreateCall(Callee, X);
8097*67e74705SXin Li }
8098*67e74705SXin Li
8099*67e74705SXin Li default:
8100*67e74705SXin Li return nullptr;
8101*67e74705SXin Li }
8102*67e74705SXin Li }
8103