xref: /aosp_15_r20/frameworks/rs/cpu_ref/rsCpuIntrinsicBLAS.cpp (revision e1eccf28f96817838ad6867f7f39d2351ec11f56)
1*e1eccf28SAndroid Build Coastguard Worker /*
2*e1eccf28SAndroid Build Coastguard Worker  * Copyright (C) 2012 The Android Open Source Project
3*e1eccf28SAndroid Build Coastguard Worker  *
4*e1eccf28SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*e1eccf28SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*e1eccf28SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*e1eccf28SAndroid Build Coastguard Worker  *
8*e1eccf28SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*e1eccf28SAndroid Build Coastguard Worker  *
10*e1eccf28SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*e1eccf28SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*e1eccf28SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*e1eccf28SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*e1eccf28SAndroid Build Coastguard Worker  * limitations under the License.
15*e1eccf28SAndroid Build Coastguard Worker  */
16*e1eccf28SAndroid Build Coastguard Worker 
17*e1eccf28SAndroid Build Coastguard Worker 
18*e1eccf28SAndroid Build Coastguard Worker #include "rsCpuIntrinsic.h"
19*e1eccf28SAndroid Build Coastguard Worker #include "rsCpuIntrinsicInlines.h"
20*e1eccf28SAndroid Build Coastguard Worker #include "rsCpuBLASDispatch.h"
21*e1eccf28SAndroid Build Coastguard Worker #include "eight_bit_int_gemm.h"
22*e1eccf28SAndroid Build Coastguard Worker 
23*e1eccf28SAndroid Build Coastguard Worker namespace android {
24*e1eccf28SAndroid Build Coastguard Worker namespace renderscript {
25*e1eccf28SAndroid Build Coastguard Worker 
26*e1eccf28SAndroid Build Coastguard Worker 
27*e1eccf28SAndroid Build Coastguard Worker class RsdCpuScriptIntrinsicBLAS : public RsdCpuScriptIntrinsic {
28*e1eccf28SAndroid Build Coastguard Worker public:
29*e1eccf28SAndroid Build Coastguard Worker     void invokeForEach(uint32_t slot,
30*e1eccf28SAndroid Build Coastguard Worker                        const Allocation ** ain,
31*e1eccf28SAndroid Build Coastguard Worker                        uint32_t inLen,
32*e1eccf28SAndroid Build Coastguard Worker                        Allocation * aout,
33*e1eccf28SAndroid Build Coastguard Worker                        const void * usr,
34*e1eccf28SAndroid Build Coastguard Worker                        uint32_t usrLen,
35*e1eccf28SAndroid Build Coastguard Worker                        const RsScriptCall *sc) override;
36*e1eccf28SAndroid Build Coastguard Worker     void populateScript(Script *) override;
37*e1eccf28SAndroid Build Coastguard Worker     ~RsdCpuScriptIntrinsicBLAS() override;
38*e1eccf28SAndroid Build Coastguard Worker     RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx, const Script *s);
39*e1eccf28SAndroid Build Coastguard Worker 
40*e1eccf28SAndroid Build Coastguard Worker protected:
41*e1eccf28SAndroid Build Coastguard Worker 
42*e1eccf28SAndroid Build Coastguard Worker     uint8_t a_offset = 0;
43*e1eccf28SAndroid Build Coastguard Worker     uint8_t b_offset = 0;
44*e1eccf28SAndroid Build Coastguard Worker     uint8_t c_offset = 0;
45*e1eccf28SAndroid Build Coastguard Worker 
46*e1eccf28SAndroid Build Coastguard Worker #ifdef RS_COMPATIBILITY_LIB
47*e1eccf28SAndroid Build Coastguard Worker     bool isBlasLibInitialized = false;
48*e1eccf28SAndroid Build Coastguard Worker #endif
49*e1eccf28SAndroid Build Coastguard Worker     static void kernelBNNM(size_t m, size_t n, size_t k,
50*e1eccf28SAndroid Build Coastguard Worker                            const uint8_t* a, uint8_t a_offset, size_t lda,
51*e1eccf28SAndroid Build Coastguard Worker                            const uint8_t* b, uint8_t b_offset, size_t ldb,
52*e1eccf28SAndroid Build Coastguard Worker                            uint8_t* c, int32_t c_offset, size_t ldc,
53*e1eccf28SAndroid Build Coastguard Worker                            int32_t c_mult_int);
54*e1eccf28SAndroid Build Coastguard Worker 
55*e1eccf28SAndroid Build Coastguard Worker 
56*e1eccf28SAndroid Build Coastguard Worker 
57*e1eccf28SAndroid Build Coastguard Worker };
58*e1eccf28SAndroid Build Coastguard Worker 
populateScript(Script * s)59*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicBLAS::populateScript(Script *s) {
60*e1eccf28SAndroid Build Coastguard Worker     s->mHal.info.exportedVariableCount = 0;
61*e1eccf28SAndroid Build Coastguard Worker }
62*e1eccf28SAndroid Build Coastguard Worker 
initABC(const Allocation ** ain,size_t size,void ** A,void ** B,void ** C,int * lda,int * ldb,int * ldc)63*e1eccf28SAndroid Build Coastguard Worker static void initABC(const Allocation ** ain,
64*e1eccf28SAndroid Build Coastguard Worker                     size_t size,
65*e1eccf28SAndroid Build Coastguard Worker                     void** A,
66*e1eccf28SAndroid Build Coastguard Worker                     void** B,
67*e1eccf28SAndroid Build Coastguard Worker                     void** C,
68*e1eccf28SAndroid Build Coastguard Worker                     int* lda,
69*e1eccf28SAndroid Build Coastguard Worker                     int* ldb,
70*e1eccf28SAndroid Build Coastguard Worker                     int* ldc)
71*e1eccf28SAndroid Build Coastguard Worker {
72*e1eccf28SAndroid Build Coastguard Worker     if (ain[0]) {
73*e1eccf28SAndroid Build Coastguard Worker         *A = ain[0]->mHal.drvState.lod[0].mallocPtr;
74*e1eccf28SAndroid Build Coastguard Worker         *lda = (int)(ain[0]->mHal.drvState.lod[0].stride/size);
75*e1eccf28SAndroid Build Coastguard Worker     }
76*e1eccf28SAndroid Build Coastguard Worker     if (ain[1]) {
77*e1eccf28SAndroid Build Coastguard Worker         *B = ain[1]->mHal.drvState.lod[0].mallocPtr;
78*e1eccf28SAndroid Build Coastguard Worker         *ldb = (int)(ain[1]->mHal.drvState.lod[0].stride/size);
79*e1eccf28SAndroid Build Coastguard Worker     }
80*e1eccf28SAndroid Build Coastguard Worker     if (ain[2]) {
81*e1eccf28SAndroid Build Coastguard Worker         *C = ain[2]->mHal.drvState.lod[0].mallocPtr;
82*e1eccf28SAndroid Build Coastguard Worker         *ldc = (int)(ain[2]->mHal.drvState.lod[0].stride/size);
83*e1eccf28SAndroid Build Coastguard Worker     }
84*e1eccf28SAndroid Build Coastguard Worker }
85*e1eccf28SAndroid Build Coastguard Worker 
86*e1eccf28SAndroid Build Coastguard Worker // Routine to setup LaunchStruct for GEMM callback.
setupGEMM(MTLaunchStructForEachBlas * mtls,const Allocation ** ain,RsBlasCall * call,RsdCpuReferenceImpl * ctx)87*e1eccf28SAndroid Build Coastguard Worker static void setupGEMM(MTLaunchStructForEachBlas *mtls, const Allocation **ain, RsBlasCall* call,
88*e1eccf28SAndroid Build Coastguard Worker                       RsdCpuReferenceImpl *ctx) {
89*e1eccf28SAndroid Build Coastguard Worker     uint32_t mm, nn, kk;
90*e1eccf28SAndroid Build Coastguard Worker     mm = call->M;
91*e1eccf28SAndroid Build Coastguard Worker     nn = call->N;
92*e1eccf28SAndroid Build Coastguard Worker     kk = call->K;
93*e1eccf28SAndroid Build Coastguard Worker 
94*e1eccf28SAndroid Build Coastguard Worker     memset(mtls, 0, sizeof(MTLaunchStructForEachBlas));
95*e1eccf28SAndroid Build Coastguard Worker     mtls->rs        = ctx;
96*e1eccf28SAndroid Build Coastguard Worker     mtls->sc        = call;
97*e1eccf28SAndroid Build Coastguard Worker     mtls->dimPtr    = &mtls->fep.dim;
98*e1eccf28SAndroid Build Coastguard Worker     mtls->fep.dim.x = nn;
99*e1eccf28SAndroid Build Coastguard Worker     mtls->fep.dim.y = mm;
100*e1eccf28SAndroid Build Coastguard Worker     mtls->fep.dim.z = kk;
101*e1eccf28SAndroid Build Coastguard Worker     if (ain) {
102*e1eccf28SAndroid Build Coastguard Worker         memcpy(mtls->ains, ain, 3 * sizeof(ain[0]));
103*e1eccf28SAndroid Build Coastguard Worker     }
104*e1eccf28SAndroid Build Coastguard Worker     uint32_t elementBytes = 4;
105*e1eccf28SAndroid Build Coastguard Worker     if (ain[0]) {
106*e1eccf28SAndroid Build Coastguard Worker         elementBytes = ain[0]->getType()->getElement()->getSizeBytes();
107*e1eccf28SAndroid Build Coastguard Worker     }
108*e1eccf28SAndroid Build Coastguard Worker     const uint32_t MIN_SIZE_TO_TILE = 64 * 1024 / elementBytes;
109*e1eccf28SAndroid Build Coastguard Worker     const uint32_t MAX_WORK_PER_THREAD = 512 / elementBytes;
110*e1eccf28SAndroid Build Coastguard Worker     const uint32_t THREAD_COUNT = ctx->getThreadCount();
111*e1eccf28SAndroid Build Coastguard Worker     uint32_t tileSizeN = 0;
112*e1eccf28SAndroid Build Coastguard Worker     uint32_t tileSizeM = 0;
113*e1eccf28SAndroid Build Coastguard Worker 
114*e1eccf28SAndroid Build Coastguard Worker     // Do not tile the matrix if:
115*e1eccf28SAndroid Build Coastguard Worker     // 1. It is too small comparing to the other matrix.
116*e1eccf28SAndroid Build Coastguard Worker     // 2. It is too small comparing to MIN_SIZE_TO_TILE .
117*e1eccf28SAndroid Build Coastguard Worker     if (nn * kk > MIN_SIZE_TO_TILE && nn * THREAD_COUNT > mm) {
118*e1eccf28SAndroid Build Coastguard Worker         tileSizeN = rsMin(nn / THREAD_COUNT, MAX_WORK_PER_THREAD);
119*e1eccf28SAndroid Build Coastguard Worker     }
120*e1eccf28SAndroid Build Coastguard Worker     if (mm * kk > MIN_SIZE_TO_TILE && mm * THREAD_COUNT > nn) {
121*e1eccf28SAndroid Build Coastguard Worker         tileSizeM = rsMin(mm / THREAD_COUNT, MAX_WORK_PER_THREAD);
122*e1eccf28SAndroid Build Coastguard Worker     }
123*e1eccf28SAndroid Build Coastguard Worker     mtls->numTileM = 1;
124*e1eccf28SAndroid Build Coastguard Worker     mtls->numTileN = 1;
125*e1eccf28SAndroid Build Coastguard Worker     mtls->tileSizeM = mm;
126*e1eccf28SAndroid Build Coastguard Worker     mtls->tileSizeN = nn;
127*e1eccf28SAndroid Build Coastguard Worker 
128*e1eccf28SAndroid Build Coastguard Worker     // If tiling is needed, compute the number of slices for A & B.
129*e1eccf28SAndroid Build Coastguard Worker     mtls->isThreadable = (tileSizeM > 0 || tileSizeN > 0);
130*e1eccf28SAndroid Build Coastguard Worker     if (tileSizeM) {
131*e1eccf28SAndroid Build Coastguard Worker         mtls->numTileM += (mm - 1) / tileSizeM;
132*e1eccf28SAndroid Build Coastguard Worker         mtls->tileSizeM = tileSizeM;
133*e1eccf28SAndroid Build Coastguard Worker     }
134*e1eccf28SAndroid Build Coastguard Worker     if (tileSizeN) {
135*e1eccf28SAndroid Build Coastguard Worker         mtls->numTileN += (nn - 1) / tileSizeN;
136*e1eccf28SAndroid Build Coastguard Worker         mtls->tileSizeN = tileSizeN;
137*e1eccf28SAndroid Build Coastguard Worker     }
138*e1eccf28SAndroid Build Coastguard Worker 
139*e1eccf28SAndroid Build Coastguard Worker     mtls->mSliceNum  = 0;
140*e1eccf28SAndroid Build Coastguard Worker }
141*e1eccf28SAndroid Build Coastguard Worker 
142*e1eccf28SAndroid Build Coastguard Worker // Generic GEMM callback routine.
143*e1eccf28SAndroid Build Coastguard Worker template <typename T_data, typename T_param, typename Func>
walk_tiled_gemm(Func blasFunc,T_param alpha,T_param beta,int vecSize,RsBlasCall * call,MTLaunchStructForEachBlas * mtls)144*e1eccf28SAndroid Build Coastguard Worker static void walk_tiled_gemm(Func blasFunc, T_param alpha, T_param beta, int vecSize,
145*e1eccf28SAndroid Build Coastguard Worker                             RsBlasCall* call, MTLaunchStructForEachBlas *mtls) {
146*e1eccf28SAndroid Build Coastguard Worker     // setup BLAS enum args
147*e1eccf28SAndroid Build Coastguard Worker     enum CBLAS_TRANSPOSE TransA = (enum CBLAS_TRANSPOSE)call->transA;
148*e1eccf28SAndroid Build Coastguard Worker     enum CBLAS_TRANSPOSE TransB = (enum CBLAS_TRANSPOSE)call->transB;
149*e1eccf28SAndroid Build Coastguard Worker 
150*e1eccf28SAndroid Build Coastguard Worker     void *A = nullptr;
151*e1eccf28SAndroid Build Coastguard Worker     void *B = nullptr;
152*e1eccf28SAndroid Build Coastguard Worker     void *C = nullptr;
153*e1eccf28SAndroid Build Coastguard Worker 
154*e1eccf28SAndroid Build Coastguard Worker     int lda = 0, ldb = 0, ldc = 0;
155*e1eccf28SAndroid Build Coastguard Worker 
156*e1eccf28SAndroid Build Coastguard Worker     const Allocation *ain[RS_KERNEL_INPUT_LIMIT];
157*e1eccf28SAndroid Build Coastguard Worker     ain[0] = mtls->ains[0];
158*e1eccf28SAndroid Build Coastguard Worker     ain[1] = mtls->ains[1];
159*e1eccf28SAndroid Build Coastguard Worker     ain[2] = mtls->ains[2];
160*e1eccf28SAndroid Build Coastguard Worker 
161*e1eccf28SAndroid Build Coastguard Worker     initABC(ain, sizeof(T_data) * vecSize, &A, &B, &C, &lda, &ldb, &ldc);
162*e1eccf28SAndroid Build Coastguard Worker 
163*e1eccf28SAndroid Build Coastguard Worker     // Determin the stride of the tiled matrices.
164*e1eccf28SAndroid Build Coastguard Worker     int mStride = (TransA == CblasNoTrans) ? lda : 1;
165*e1eccf28SAndroid Build Coastguard Worker     int nStride = (TransB == CblasNoTrans) ? 1 : ldb;
166*e1eccf28SAndroid Build Coastguard Worker     while (1) {
167*e1eccf28SAndroid Build Coastguard Worker         uint32_t slice  = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
168*e1eccf28SAndroid Build Coastguard Worker 
169*e1eccf28SAndroid Build Coastguard Worker         uint32_t mStart = (slice % mtls->numTileM) * mtls->tileSizeM;
170*e1eccf28SAndroid Build Coastguard Worker         uint32_t mEnd   = mStart + mtls->tileSizeM;
171*e1eccf28SAndroid Build Coastguard Worker         mEnd = rsMin(mEnd, (uint32_t)call->M);
172*e1eccf28SAndroid Build Coastguard Worker         if (mEnd <= mStart) {
173*e1eccf28SAndroid Build Coastguard Worker             return;
174*e1eccf28SAndroid Build Coastguard Worker         }
175*e1eccf28SAndroid Build Coastguard Worker 
176*e1eccf28SAndroid Build Coastguard Worker         uint32_t nStart = (slice / mtls->numTileM) * mtls->tileSizeN;
177*e1eccf28SAndroid Build Coastguard Worker         uint32_t nEnd   = nStart + mtls->tileSizeN;
178*e1eccf28SAndroid Build Coastguard Worker         nEnd = rsMin(nEnd, (uint32_t)call->N);
179*e1eccf28SAndroid Build Coastguard Worker         if (nEnd <= nStart) {
180*e1eccf28SAndroid Build Coastguard Worker             return;
181*e1eccf28SAndroid Build Coastguard Worker         }
182*e1eccf28SAndroid Build Coastguard Worker 
183*e1eccf28SAndroid Build Coastguard Worker         blasFunc(CblasRowMajor, TransA, TransB,
184*e1eccf28SAndroid Build Coastguard Worker                  mEnd - mStart, nEnd - nStart, call->K, alpha,
185*e1eccf28SAndroid Build Coastguard Worker                  (T_data *)A + mStart * mStride * vecSize, lda,
186*e1eccf28SAndroid Build Coastguard Worker                  (T_data *)B + nStart * nStride * vecSize, ldb, beta,
187*e1eccf28SAndroid Build Coastguard Worker                  (T_data *)C + (mStart * ldc + nStart) * vecSize, ldc);
188*e1eccf28SAndroid Build Coastguard Worker     }
189*e1eccf28SAndroid Build Coastguard Worker }
190*e1eccf28SAndroid Build Coastguard Worker 
191*e1eccf28SAndroid Build Coastguard Worker // SGEMM callback
walk_2d_sgemm(void * usr,uint32_t idx)192*e1eccf28SAndroid Build Coastguard Worker static void walk_2d_sgemm(void *usr, uint32_t idx) {
193*e1eccf28SAndroid Build Coastguard Worker     MTLaunchStructForEachBlas *mtls = (MTLaunchStructForEachBlas *)usr;
194*e1eccf28SAndroid Build Coastguard Worker     RsBlasCall* call = (RsBlasCall*) mtls->sc;
195*e1eccf28SAndroid Build Coastguard Worker 
196*e1eccf28SAndroid Build Coastguard Worker     float alpha = call->alpha.f;
197*e1eccf28SAndroid Build Coastguard Worker     float beta = call->beta.f;
198*e1eccf28SAndroid Build Coastguard Worker 
199*e1eccf28SAndroid Build Coastguard Worker     walk_tiled_gemm<float, float, FnPtr_cblas_sgemm>(cblas_sgemm, alpha, beta, 1, call, mtls);
200*e1eccf28SAndroid Build Coastguard Worker }
201*e1eccf28SAndroid Build Coastguard Worker 
202*e1eccf28SAndroid Build Coastguard Worker // DGEMM callback
walk_2d_dgemm(void * usr,uint32_t idx)203*e1eccf28SAndroid Build Coastguard Worker static void walk_2d_dgemm(void *usr, uint32_t idx) {
204*e1eccf28SAndroid Build Coastguard Worker     MTLaunchStructForEachBlas *mtls = (MTLaunchStructForEachBlas *)usr;
205*e1eccf28SAndroid Build Coastguard Worker     RsBlasCall* call = (RsBlasCall*) mtls->sc;
206*e1eccf28SAndroid Build Coastguard Worker 
207*e1eccf28SAndroid Build Coastguard Worker     double alpha = call->alpha.d;
208*e1eccf28SAndroid Build Coastguard Worker     double beta = call->beta.d;
209*e1eccf28SAndroid Build Coastguard Worker 
210*e1eccf28SAndroid Build Coastguard Worker     walk_tiled_gemm<double, double, FnPtr_cblas_dgemm>(cblas_dgemm, alpha, beta, 1, call, mtls);
211*e1eccf28SAndroid Build Coastguard Worker }
212*e1eccf28SAndroid Build Coastguard Worker 
213*e1eccf28SAndroid Build Coastguard Worker // CGEMM callback
walk_2d_cgemm(void * usr,uint32_t idx)214*e1eccf28SAndroid Build Coastguard Worker static void walk_2d_cgemm(void *usr, uint32_t idx) {
215*e1eccf28SAndroid Build Coastguard Worker     MTLaunchStructForEachBlas *mtls = (MTLaunchStructForEachBlas *)usr;
216*e1eccf28SAndroid Build Coastguard Worker     RsBlasCall* call = (RsBlasCall*) mtls->sc;
217*e1eccf28SAndroid Build Coastguard Worker 
218*e1eccf28SAndroid Build Coastguard Worker     void * alpha = (void *)&call->alpha.c;
219*e1eccf28SAndroid Build Coastguard Worker     void * beta = (void *)&call->beta.c;
220*e1eccf28SAndroid Build Coastguard Worker 
221*e1eccf28SAndroid Build Coastguard Worker     walk_tiled_gemm<float, void *, FnPtr_cblas_cgemm>(cblas_cgemm, alpha, beta, 2, call, mtls);
222*e1eccf28SAndroid Build Coastguard Worker }
223*e1eccf28SAndroid Build Coastguard Worker 
224*e1eccf28SAndroid Build Coastguard Worker // ZGEMM callback
walk_2d_zgemm(void * usr,uint32_t idx)225*e1eccf28SAndroid Build Coastguard Worker static void walk_2d_zgemm(void *usr, uint32_t idx) {
226*e1eccf28SAndroid Build Coastguard Worker     MTLaunchStructForEachBlas *mtls = (MTLaunchStructForEachBlas *)usr;
227*e1eccf28SAndroid Build Coastguard Worker     RsBlasCall* call = (RsBlasCall*) mtls->sc;
228*e1eccf28SAndroid Build Coastguard Worker 
229*e1eccf28SAndroid Build Coastguard Worker     void * alpha = (void *)&call->alpha.z;
230*e1eccf28SAndroid Build Coastguard Worker     void * beta = (void *)&call->beta.z;
231*e1eccf28SAndroid Build Coastguard Worker 
232*e1eccf28SAndroid Build Coastguard Worker     walk_tiled_gemm<double, void *, FnPtr_cblas_zgemm>(cblas_zgemm, alpha, beta, 2, call, mtls);
233*e1eccf28SAndroid Build Coastguard Worker }
234*e1eccf28SAndroid Build Coastguard Worker 
235*e1eccf28SAndroid Build Coastguard Worker 
invokeForEach(uint32_t slot,const Allocation ** ain,uint32_t inLen,Allocation * aout,const void * usr,uint32_t usrLen,const RsScriptCall * sc)236*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicBLAS::invokeForEach(uint32_t slot,
237*e1eccf28SAndroid Build Coastguard Worker                                               const Allocation ** ain,
238*e1eccf28SAndroid Build Coastguard Worker                                               uint32_t inLen,
239*e1eccf28SAndroid Build Coastguard Worker                                               Allocation * aout,
240*e1eccf28SAndroid Build Coastguard Worker                                               const void * usr,
241*e1eccf28SAndroid Build Coastguard Worker                                               uint32_t usrLen,
242*e1eccf28SAndroid Build Coastguard Worker                                               const RsScriptCall *sc) {
243*e1eccf28SAndroid Build Coastguard Worker     RsBlasCall* call = (RsBlasCall*) usr;
244*e1eccf28SAndroid Build Coastguard Worker     // setup BLAS enum args
245*e1eccf28SAndroid Build Coastguard Worker     enum CBLAS_TRANSPOSE TransA = (enum CBLAS_TRANSPOSE)call->transA;
246*e1eccf28SAndroid Build Coastguard Worker     enum CBLAS_TRANSPOSE TransB = (enum CBLAS_TRANSPOSE)call->transB;
247*e1eccf28SAndroid Build Coastguard Worker     enum CBLAS_UPLO Uplo = (enum CBLAS_UPLO)call->uplo;
248*e1eccf28SAndroid Build Coastguard Worker     enum CBLAS_DIAG Diag = (enum CBLAS_DIAG)call->diag;
249*e1eccf28SAndroid Build Coastguard Worker     enum CBLAS_SIDE Side = (enum CBLAS_SIDE)call->side;
250*e1eccf28SAndroid Build Coastguard Worker 
251*e1eccf28SAndroid Build Coastguard Worker     void *A = nullptr;
252*e1eccf28SAndroid Build Coastguard Worker     void *B = nullptr;
253*e1eccf28SAndroid Build Coastguard Worker     void *C = nullptr;
254*e1eccf28SAndroid Build Coastguard Worker     void *X = nullptr;
255*e1eccf28SAndroid Build Coastguard Worker     void *Y = nullptr;
256*e1eccf28SAndroid Build Coastguard Worker 
257*e1eccf28SAndroid Build Coastguard Worker     int lda = 0, ldb = 0, ldc = 0;
258*e1eccf28SAndroid Build Coastguard Worker 
259*e1eccf28SAndroid Build Coastguard Worker     MTLaunchStructForEachBlas mtls;
260*e1eccf28SAndroid Build Coastguard Worker 
261*e1eccf28SAndroid Build Coastguard Worker #ifdef RS_COMPATIBILITY_LIB
262*e1eccf28SAndroid Build Coastguard Worker     // Allow BNNM even without libblas
263*e1eccf28SAndroid Build Coastguard Worker     if (call->func != RsBlas_bnnm && !isBlasLibInitialized) {
264*e1eccf28SAndroid Build Coastguard Worker         if (!loadBLASLib()) {
265*e1eccf28SAndroid Build Coastguard Worker             ALOGE("Failed to load the BLAS lib, IntrinsicBLAS NOT supported!\n");
266*e1eccf28SAndroid Build Coastguard Worker             return;
267*e1eccf28SAndroid Build Coastguard Worker         }
268*e1eccf28SAndroid Build Coastguard Worker         isBlasLibInitialized = true;
269*e1eccf28SAndroid Build Coastguard Worker     }
270*e1eccf28SAndroid Build Coastguard Worker #endif
271*e1eccf28SAndroid Build Coastguard Worker 
272*e1eccf28SAndroid Build Coastguard Worker     switch (call->func) {
273*e1eccf28SAndroid Build Coastguard Worker 
274*e1eccf28SAndroid Build Coastguard Worker     // Level 1 BLAS: returns into a 1D Allocation
275*e1eccf28SAndroid Build Coastguard Worker 
276*e1eccf28SAndroid Build Coastguard Worker 
277*e1eccf28SAndroid Build Coastguard Worker     // Level 2 BLAS
278*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_sgemv):
279*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
280*e1eccf28SAndroid Build Coastguard Worker         cblas_sgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.f, (float*)A,
281*e1eccf28SAndroid Build Coastguard Worker                     lda, (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
282*e1eccf28SAndroid Build Coastguard Worker         break;
283*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_sgbmv):
284*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
285*e1eccf28SAndroid Build Coastguard Worker         cblas_sgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
286*e1eccf28SAndroid Build Coastguard Worker                     call->alpha.f, (float*)A, lda, (float*)X, call->incX,
287*e1eccf28SAndroid Build Coastguard Worker                     call->beta.f, (float*)Y, call->incY);
288*e1eccf28SAndroid Build Coastguard Worker         break;
289*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_strmv):
290*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
291*e1eccf28SAndroid Build Coastguard Worker         cblas_strmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
292*e1eccf28SAndroid Build Coastguard Worker                     lda, (float*)X, call->incX);
293*e1eccf28SAndroid Build Coastguard Worker         break;
294*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_stbmv):
295*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
296*e1eccf28SAndroid Build Coastguard Worker         cblas_stbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A,
297*e1eccf28SAndroid Build Coastguard Worker                     lda, (float*)X, call->incX);
298*e1eccf28SAndroid Build Coastguard Worker         break;
299*e1eccf28SAndroid Build Coastguard Worker     // stpmv takes a packed 1D Allocation only
300*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_stpmv):
301*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
302*e1eccf28SAndroid Build Coastguard Worker         cblas_stpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
303*e1eccf28SAndroid Build Coastguard Worker                     (float*)X, call->incX);
304*e1eccf28SAndroid Build Coastguard Worker         break;
305*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_strsv):
306*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
307*e1eccf28SAndroid Build Coastguard Worker         cblas_strsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, lda,
308*e1eccf28SAndroid Build Coastguard Worker                     (float*)X, call->incX);
309*e1eccf28SAndroid Build Coastguard Worker         break;
310*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_stbsv):
311*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
312*e1eccf28SAndroid Build Coastguard Worker         cblas_stbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A,
313*e1eccf28SAndroid Build Coastguard Worker                     lda, (float*)X, call->incX);
314*e1eccf28SAndroid Build Coastguard Worker         break;
315*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_stpsv):
316*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
317*e1eccf28SAndroid Build Coastguard Worker         cblas_stpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
318*e1eccf28SAndroid Build Coastguard Worker                     (float*)X, call->incX);
319*e1eccf28SAndroid Build Coastguard Worker         break;
320*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dgemv):
321*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
322*e1eccf28SAndroid Build Coastguard Worker         cblas_dgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.d, (double*)A,
323*e1eccf28SAndroid Build Coastguard Worker                     lda, (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
324*e1eccf28SAndroid Build Coastguard Worker         break;
325*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dgbmv):
326*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
327*e1eccf28SAndroid Build Coastguard Worker         cblas_dgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
328*e1eccf28SAndroid Build Coastguard Worker                     call->alpha.d, (double*)A, lda, (double*)X, call->incX,
329*e1eccf28SAndroid Build Coastguard Worker                     call->beta.d, (double*)Y, call->incY);
330*e1eccf28SAndroid Build Coastguard Worker         break;
331*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dtrmv):
332*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
333*e1eccf28SAndroid Build Coastguard Worker         cblas_dtrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
334*e1eccf28SAndroid Build Coastguard Worker                     lda, (double*)X, call->incX);
335*e1eccf28SAndroid Build Coastguard Worker         break;
336*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dtbmv):
337*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
338*e1eccf28SAndroid Build Coastguard Worker         cblas_dtbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A,
339*e1eccf28SAndroid Build Coastguard Worker                     lda, (double*)X, call->incX);
340*e1eccf28SAndroid Build Coastguard Worker         break;
341*e1eccf28SAndroid Build Coastguard Worker     // stpmv takes a packed 1D Allocation only
342*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dtpmv):
343*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
344*e1eccf28SAndroid Build Coastguard Worker         cblas_dtpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
345*e1eccf28SAndroid Build Coastguard Worker                     (double*)X, call->incX);
346*e1eccf28SAndroid Build Coastguard Worker         break;
347*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dtrsv):
348*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
349*e1eccf28SAndroid Build Coastguard Worker         cblas_dtrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, lda,
350*e1eccf28SAndroid Build Coastguard Worker                     (double*)X, call->incX);
351*e1eccf28SAndroid Build Coastguard Worker         break;
352*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dtbsv):
353*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
354*e1eccf28SAndroid Build Coastguard Worker         cblas_dtbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A,
355*e1eccf28SAndroid Build Coastguard Worker                     lda, (double*)X, call->incX);
356*e1eccf28SAndroid Build Coastguard Worker         break;
357*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dtpsv):
358*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
359*e1eccf28SAndroid Build Coastguard Worker         cblas_dtpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
360*e1eccf28SAndroid Build Coastguard Worker                     (double*)X, call->incX);
361*e1eccf28SAndroid Build Coastguard Worker         break;
362*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_cgemv):
363*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
364*e1eccf28SAndroid Build Coastguard Worker         cblas_cgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.c, (void*)A,
365*e1eccf28SAndroid Build Coastguard Worker                     lda, (void*)X, call->incX, (void*)&call->beta.c, (void*)Y, call->incY);
366*e1eccf28SAndroid Build Coastguard Worker         break;
367*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_cgbmv):
368*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
369*e1eccf28SAndroid Build Coastguard Worker         cblas_cgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
370*e1eccf28SAndroid Build Coastguard Worker                     (void*)&call->alpha.c, (void*)A, lda, (void*)X, call->incX,
371*e1eccf28SAndroid Build Coastguard Worker                     (void*)&call->beta.c, (void*)Y, call->incY);
372*e1eccf28SAndroid Build Coastguard Worker         break;
373*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ctrmv):
374*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
375*e1eccf28SAndroid Build Coastguard Worker         cblas_ctrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
376*e1eccf28SAndroid Build Coastguard Worker                     lda, (void*)X, call->incX);
377*e1eccf28SAndroid Build Coastguard Worker         break;
378*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ctbmv):
379*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
380*e1eccf28SAndroid Build Coastguard Worker         cblas_ctbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
381*e1eccf28SAndroid Build Coastguard Worker                     lda, (void*)X, call->incX);
382*e1eccf28SAndroid Build Coastguard Worker         break;
383*e1eccf28SAndroid Build Coastguard Worker     // stpmv takes a packed 1D Allocation only
384*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ctpmv):
385*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
386*e1eccf28SAndroid Build Coastguard Worker         cblas_ctpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
387*e1eccf28SAndroid Build Coastguard Worker                     (void*)X, call->incX);
388*e1eccf28SAndroid Build Coastguard Worker         break;
389*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ctrsv):
390*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
391*e1eccf28SAndroid Build Coastguard Worker         cblas_ctrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda,
392*e1eccf28SAndroid Build Coastguard Worker                     (void*)X, call->incX);
393*e1eccf28SAndroid Build Coastguard Worker         break;
394*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ctbsv):
395*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
396*e1eccf28SAndroid Build Coastguard Worker         cblas_ctbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
397*e1eccf28SAndroid Build Coastguard Worker                     lda, (void*)X, call->incX);
398*e1eccf28SAndroid Build Coastguard Worker         break;
399*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ctpsv):
400*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
401*e1eccf28SAndroid Build Coastguard Worker         cblas_ctpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
402*e1eccf28SAndroid Build Coastguard Worker                     (void*)X, call->incX);
403*e1eccf28SAndroid Build Coastguard Worker         break;
404*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zgemv):
405*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
406*e1eccf28SAndroid Build Coastguard Worker         cblas_zgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.z, (void*)A,
407*e1eccf28SAndroid Build Coastguard Worker                     lda, (void*)X, call->incX, (void*)&call->beta.z, (void*)Y, call->incY);
408*e1eccf28SAndroid Build Coastguard Worker         break;
409*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zgbmv):
410*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
411*e1eccf28SAndroid Build Coastguard Worker         cblas_zgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
412*e1eccf28SAndroid Build Coastguard Worker                     (void*)&call->alpha.z, (void*)A, lda, (void*)X, call->incX,
413*e1eccf28SAndroid Build Coastguard Worker                     (void*)&call->beta.z, (void*)Y, call->incY);
414*e1eccf28SAndroid Build Coastguard Worker         break;
415*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ztrmv):
416*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
417*e1eccf28SAndroid Build Coastguard Worker         cblas_ztrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
418*e1eccf28SAndroid Build Coastguard Worker                     lda, (void*)X, call->incX);
419*e1eccf28SAndroid Build Coastguard Worker         break;
420*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ztbmv):
421*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
422*e1eccf28SAndroid Build Coastguard Worker         cblas_ztbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
423*e1eccf28SAndroid Build Coastguard Worker                     lda, (void*)X, call->incX);
424*e1eccf28SAndroid Build Coastguard Worker         break;
425*e1eccf28SAndroid Build Coastguard Worker     // stpmv takes a packed 1D Allocation only
426*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ztpmv):
427*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
428*e1eccf28SAndroid Build Coastguard Worker         cblas_ztpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
429*e1eccf28SAndroid Build Coastguard Worker                     (void*)X, call->incX);
430*e1eccf28SAndroid Build Coastguard Worker         break;
431*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ztrsv):
432*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
433*e1eccf28SAndroid Build Coastguard Worker         cblas_ztrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda,
434*e1eccf28SAndroid Build Coastguard Worker                     (void*)X, call->incX);
435*e1eccf28SAndroid Build Coastguard Worker         break;
436*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ztbsv):
437*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
438*e1eccf28SAndroid Build Coastguard Worker         cblas_ztbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
439*e1eccf28SAndroid Build Coastguard Worker                     lda, (void*)X, call->incX);
440*e1eccf28SAndroid Build Coastguard Worker         break;
441*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ztpsv):
442*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
443*e1eccf28SAndroid Build Coastguard Worker         cblas_ztpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
444*e1eccf28SAndroid Build Coastguard Worker                     (void*)X, call->incX);
445*e1eccf28SAndroid Build Coastguard Worker         break;
446*e1eccf28SAndroid Build Coastguard Worker 
447*e1eccf28SAndroid Build Coastguard Worker 
448*e1eccf28SAndroid Build Coastguard Worker     // S and D only
449*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ssymv):
450*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
451*e1eccf28SAndroid Build Coastguard Worker         cblas_ssymv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A, lda,
452*e1eccf28SAndroid Build Coastguard Worker                     (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
453*e1eccf28SAndroid Build Coastguard Worker         break;
454*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ssbmv):
455*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
456*e1eccf28SAndroid Build Coastguard Worker         cblas_ssbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.f,
457*e1eccf28SAndroid Build Coastguard Worker                     (float*)A, lda, (float*)X, call->incX, call->beta.f,
458*e1eccf28SAndroid Build Coastguard Worker                     (float*)Y, call->incY);
459*e1eccf28SAndroid Build Coastguard Worker         break;
460*e1eccf28SAndroid Build Coastguard Worker     //sspmv requires a packed 1D Allocation
461*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_sspmv):
462*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
463*e1eccf28SAndroid Build Coastguard Worker         cblas_sspmv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A,
464*e1eccf28SAndroid Build Coastguard Worker                     (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
465*e1eccf28SAndroid Build Coastguard Worker         break;
466*e1eccf28SAndroid Build Coastguard Worker     // following calls have init reordered because A is output matrix
467*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_sger):
468*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
469*e1eccf28SAndroid Build Coastguard Worker         cblas_sger(CblasRowMajor, call->M, call->N, call->alpha.f, (float*)X,
470*e1eccf28SAndroid Build Coastguard Worker                    call->incX, (float*)Y, call->incY, (float*)A, lda);
471*e1eccf28SAndroid Build Coastguard Worker         break;
472*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ssyr):
473*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr);
474*e1eccf28SAndroid Build Coastguard Worker         cblas_ssyr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
475*e1eccf28SAndroid Build Coastguard Worker                    (float*)A, lda);
476*e1eccf28SAndroid Build Coastguard Worker         break;
477*e1eccf28SAndroid Build Coastguard Worker     // sspr is packed 1D Allocation A only
478*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_sspr):
479*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr);
480*e1eccf28SAndroid Build Coastguard Worker         cblas_sspr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
481*e1eccf28SAndroid Build Coastguard Worker                    (float*)A);
482*e1eccf28SAndroid Build Coastguard Worker         break;
483*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ssyr2):
484*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
485*e1eccf28SAndroid Build Coastguard Worker         cblas_ssyr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
486*e1eccf28SAndroid Build Coastguard Worker                     (float*)Y, call->incY, (float*)A, lda);
487*e1eccf28SAndroid Build Coastguard Worker         break;
488*e1eccf28SAndroid Build Coastguard Worker     // sspr2 is packed 1D Allocation A only
489*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_sspr2):
490*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
491*e1eccf28SAndroid Build Coastguard Worker         cblas_sspr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
492*e1eccf28SAndroid Build Coastguard Worker                     (float*)Y, call->incY, (float*)A);
493*e1eccf28SAndroid Build Coastguard Worker         break;
494*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dsymv):
495*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
496*e1eccf28SAndroid Build Coastguard Worker         cblas_dsymv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A, lda,
497*e1eccf28SAndroid Build Coastguard Worker                     (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
498*e1eccf28SAndroid Build Coastguard Worker         break;
499*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dsbmv):
500*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
501*e1eccf28SAndroid Build Coastguard Worker         cblas_dsbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.d,
502*e1eccf28SAndroid Build Coastguard Worker                     (double*)A, lda, (double*)X, call->incX, call->beta.d,
503*e1eccf28SAndroid Build Coastguard Worker                     (double*)Y, call->incY);
504*e1eccf28SAndroid Build Coastguard Worker         break;
505*e1eccf28SAndroid Build Coastguard Worker     // dspmv requires a packed 1D Allocation
506*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dspmv):
507*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
508*e1eccf28SAndroid Build Coastguard Worker         cblas_dspmv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A,
509*e1eccf28SAndroid Build Coastguard Worker                     (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
510*e1eccf28SAndroid Build Coastguard Worker         break;
511*e1eccf28SAndroid Build Coastguard Worker     // following calls have init reordered because A is output matrix
512*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dger):
513*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
514*e1eccf28SAndroid Build Coastguard Worker         cblas_dger(CblasRowMajor, call->M, call->N, call->alpha.d, (double*)X,
515*e1eccf28SAndroid Build Coastguard Worker                    call->incX, (double*)Y, call->incY, (double*)A, lda);
516*e1eccf28SAndroid Build Coastguard Worker         break;
517*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dsyr):
518*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr);
519*e1eccf28SAndroid Build Coastguard Worker         cblas_dsyr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
520*e1eccf28SAndroid Build Coastguard Worker                    (double*)A, lda);
521*e1eccf28SAndroid Build Coastguard Worker         break;
522*e1eccf28SAndroid Build Coastguard Worker     // dspr is packed 1D Allocation A only
523*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dspr):
524*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr);
525*e1eccf28SAndroid Build Coastguard Worker         cblas_dspr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
526*e1eccf28SAndroid Build Coastguard Worker                    (double*)A);
527*e1eccf28SAndroid Build Coastguard Worker         break;
528*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dsyr2):
529*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
530*e1eccf28SAndroid Build Coastguard Worker         cblas_dsyr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
531*e1eccf28SAndroid Build Coastguard Worker                     (double*)Y, call->incY, (double*)A, lda);
532*e1eccf28SAndroid Build Coastguard Worker         break;
533*e1eccf28SAndroid Build Coastguard Worker     // dspr2 is packed 1D Allocation A only
534*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dspr2):
535*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
536*e1eccf28SAndroid Build Coastguard Worker         cblas_dspr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
537*e1eccf28SAndroid Build Coastguard Worker                     (double*)Y, call->incY, (double*)A);
538*e1eccf28SAndroid Build Coastguard Worker         break;
539*e1eccf28SAndroid Build Coastguard Worker 
540*e1eccf28SAndroid Build Coastguard Worker     // C and Z only
541*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_chemv):
542*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
543*e1eccf28SAndroid Build Coastguard Worker         cblas_chemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A, lda,
544*e1eccf28SAndroid Build Coastguard Worker                     X, call->incX, (void*)&call->beta.c, Y, call->incY);
545*e1eccf28SAndroid Build Coastguard Worker         break;
546*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_chbmv):
547*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
548*e1eccf28SAndroid Build Coastguard Worker         cblas_chbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.c,
549*e1eccf28SAndroid Build Coastguard Worker                     A, lda, X, call->incX, (void*)&call->beta.c, Y, call->incY);
550*e1eccf28SAndroid Build Coastguard Worker         break;
551*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_chpmv):
552*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
553*e1eccf28SAndroid Build Coastguard Worker         cblas_chpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A,
554*e1eccf28SAndroid Build Coastguard Worker                     X, call->incX, (void*)&call->beta.c, Y, call->incY);
555*e1eccf28SAndroid Build Coastguard Worker         break;
556*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_cgeru):
557*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
558*e1eccf28SAndroid Build Coastguard Worker         cblas_cgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c,
559*e1eccf28SAndroid Build Coastguard Worker                     X, call->incX, Y, call->incY, A, lda);
560*e1eccf28SAndroid Build Coastguard Worker         break;
561*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_cgerc):
562*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
563*e1eccf28SAndroid Build Coastguard Worker         cblas_cgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c,
564*e1eccf28SAndroid Build Coastguard Worker                     X, call->incX, Y, call->incY, A, lda);
565*e1eccf28SAndroid Build Coastguard Worker         break;
566*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_cher):
567*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &X, nullptr, &A, &ldb, nullptr, &lda);
568*e1eccf28SAndroid Build Coastguard Worker         cblas_cher(CblasRowMajor, Uplo, call->N, call->alpha.f,
569*e1eccf28SAndroid Build Coastguard Worker                    X, call->incX, A, lda);
570*e1eccf28SAndroid Build Coastguard Worker         break;
571*e1eccf28SAndroid Build Coastguard Worker     // packed 1D Allocations only
572*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_chpr):
573*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &X, nullptr, &A, &ldb, nullptr, &lda);
574*e1eccf28SAndroid Build Coastguard Worker         cblas_chpr(CblasRowMajor, Uplo, call->N, call->alpha.f, X,
575*e1eccf28SAndroid Build Coastguard Worker                    call->incX, A);
576*e1eccf28SAndroid Build Coastguard Worker         break;
577*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_cher2):
578*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
579*e1eccf28SAndroid Build Coastguard Worker         cblas_cher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c,
580*e1eccf28SAndroid Build Coastguard Worker                    X, call->incX, Y, call->incY, A, lda);
581*e1eccf28SAndroid Build Coastguard Worker         break;
582*e1eccf28SAndroid Build Coastguard Worker     // packed 1D Allocations only
583*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_chpr2):
584*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
585*e1eccf28SAndroid Build Coastguard Worker         cblas_chpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, X,
586*e1eccf28SAndroid Build Coastguard Worker                    call->incX, Y, call->incY, A);
587*e1eccf28SAndroid Build Coastguard Worker         break;
588*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zhemv):
589*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
590*e1eccf28SAndroid Build Coastguard Worker         cblas_zhemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A, lda,
591*e1eccf28SAndroid Build Coastguard Worker                     X, call->incX, (void*)&call->beta.z, Y, call->incY);
592*e1eccf28SAndroid Build Coastguard Worker         break;
593*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zhbmv):
594*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
595*e1eccf28SAndroid Build Coastguard Worker         cblas_zhbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.z,
596*e1eccf28SAndroid Build Coastguard Worker                     A, lda, X, call->incX, (void*)&call->beta.z, Y, call->incY);
597*e1eccf28SAndroid Build Coastguard Worker         break;
598*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zhpmv):
599*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
600*e1eccf28SAndroid Build Coastguard Worker         cblas_zhpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A,
601*e1eccf28SAndroid Build Coastguard Worker                     X, call->incX, (void*)&call->beta.z, Y, call->incY);
602*e1eccf28SAndroid Build Coastguard Worker         break;
603*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zgeru):
604*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
605*e1eccf28SAndroid Build Coastguard Worker         cblas_zgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z,
606*e1eccf28SAndroid Build Coastguard Worker                     X, call->incX, Y, call->incY, A, lda);
607*e1eccf28SAndroid Build Coastguard Worker         break;
608*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zgerc):
609*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
610*e1eccf28SAndroid Build Coastguard Worker         cblas_zgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z,
611*e1eccf28SAndroid Build Coastguard Worker                     X, call->incX, Y, call->incY, A, lda);
612*e1eccf28SAndroid Build Coastguard Worker         break;
613*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zher):
614*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &X, nullptr, &A, &ldb, nullptr, &lda);
615*e1eccf28SAndroid Build Coastguard Worker         cblas_zher(CblasRowMajor, Uplo, call->N, call->alpha.d,
616*e1eccf28SAndroid Build Coastguard Worker                    X, call->incX, A, lda);
617*e1eccf28SAndroid Build Coastguard Worker         break;
618*e1eccf28SAndroid Build Coastguard Worker     // packed 1D Allocations only
619*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zhpr):
620*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &X, nullptr, &A, &ldb, nullptr, &lda);
621*e1eccf28SAndroid Build Coastguard Worker         cblas_zhpr(CblasRowMajor, Uplo, call->N, call->alpha.d, X,
622*e1eccf28SAndroid Build Coastguard Worker                    call->incX, A);
623*e1eccf28SAndroid Build Coastguard Worker         break;
624*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zher2):
625*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
626*e1eccf28SAndroid Build Coastguard Worker         cblas_zher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z,
627*e1eccf28SAndroid Build Coastguard Worker                    X, call->incX, Y, call->incY, A, lda);
628*e1eccf28SAndroid Build Coastguard Worker         break;
629*e1eccf28SAndroid Build Coastguard Worker     // packed 1D Allocations only
630*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zhpr2):
631*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
632*e1eccf28SAndroid Build Coastguard Worker         cblas_zhpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, X,
633*e1eccf28SAndroid Build Coastguard Worker                    call->incX, Y, call->incY, A);
634*e1eccf28SAndroid Build Coastguard Worker         break;
635*e1eccf28SAndroid Build Coastguard Worker 
636*e1eccf28SAndroid Build Coastguard Worker     // Level 3 BLAS
637*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_sgemm):
638*e1eccf28SAndroid Build Coastguard Worker         setupGEMM(&mtls, ain, call, mCtx);
639*e1eccf28SAndroid Build Coastguard Worker         if (mtls.isThreadable) {
640*e1eccf28SAndroid Build Coastguard Worker             mCtx->launchThreads(walk_2d_sgemm, &mtls);
641*e1eccf28SAndroid Build Coastguard Worker         } else {
642*e1eccf28SAndroid Build Coastguard Worker             initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
643*e1eccf28SAndroid Build Coastguard Worker             cblas_sgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.f,
644*e1eccf28SAndroid Build Coastguard Worker                         (float*)A, lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
645*e1eccf28SAndroid Build Coastguard Worker         }
646*e1eccf28SAndroid Build Coastguard Worker         break;
647*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ssymm):
648*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
649*e1eccf28SAndroid Build Coastguard Worker         cblas_ssymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.f, (float*)A,
650*e1eccf28SAndroid Build Coastguard Worker                     lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
651*e1eccf28SAndroid Build Coastguard Worker         break;
652*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ssyrk):
653*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, nullptr, &C, &lda, nullptr, &ldc);
654*e1eccf28SAndroid Build Coastguard Worker         cblas_ssyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A,
655*e1eccf28SAndroid Build Coastguard Worker                     lda, call->beta.f, (float*)C, ldc);
656*e1eccf28SAndroid Build Coastguard Worker         break;
657*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ssyr2k):
658*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
659*e1eccf28SAndroid Build Coastguard Worker         cblas_ssyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A,
660*e1eccf28SAndroid Build Coastguard Worker                      lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
661*e1eccf28SAndroid Build Coastguard Worker         break;
662*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_strmm):
663*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr);
664*e1eccf28SAndroid Build Coastguard Worker         cblas_strmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f,
665*e1eccf28SAndroid Build Coastguard Worker                     (float*)A, lda, (float*)B, ldb);
666*e1eccf28SAndroid Build Coastguard Worker         break;
667*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_strsm):
668*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr);
669*e1eccf28SAndroid Build Coastguard Worker         cblas_strsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f,
670*e1eccf28SAndroid Build Coastguard Worker                     (float*)A, lda, (float*)B, ldb);
671*e1eccf28SAndroid Build Coastguard Worker         break;
672*e1eccf28SAndroid Build Coastguard Worker 
673*e1eccf28SAndroid Build Coastguard Worker 
674*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dgemm):
675*e1eccf28SAndroid Build Coastguard Worker         setupGEMM(&mtls, ain, call, mCtx);
676*e1eccf28SAndroid Build Coastguard Worker         if (mtls.isThreadable) {
677*e1eccf28SAndroid Build Coastguard Worker             mCtx->launchThreads(walk_2d_dgemm, &mtls);
678*e1eccf28SAndroid Build Coastguard Worker         } else {
679*e1eccf28SAndroid Build Coastguard Worker             initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
680*e1eccf28SAndroid Build Coastguard Worker             cblas_dgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.d,
681*e1eccf28SAndroid Build Coastguard Worker                         (double*)A, lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
682*e1eccf28SAndroid Build Coastguard Worker         }
683*e1eccf28SAndroid Build Coastguard Worker         break;
684*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dsymm):
685*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
686*e1eccf28SAndroid Build Coastguard Worker         cblas_dsymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.d, (double*)A,
687*e1eccf28SAndroid Build Coastguard Worker                     lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
688*e1eccf28SAndroid Build Coastguard Worker         break;
689*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dsyrk):
690*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, nullptr, &C, &lda, nullptr, &ldc);
691*e1eccf28SAndroid Build Coastguard Worker         cblas_dsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A,
692*e1eccf28SAndroid Build Coastguard Worker                     lda, call->beta.d, (double*)C, ldc);
693*e1eccf28SAndroid Build Coastguard Worker         break;
694*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dsyr2k):
695*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
696*e1eccf28SAndroid Build Coastguard Worker         cblas_dsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A,
697*e1eccf28SAndroid Build Coastguard Worker                      lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
698*e1eccf28SAndroid Build Coastguard Worker         break;
699*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dtrmm):
700*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr);
701*e1eccf28SAndroid Build Coastguard Worker         cblas_dtrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d,
702*e1eccf28SAndroid Build Coastguard Worker                     (double*)A, lda, (double*)B, ldb);
703*e1eccf28SAndroid Build Coastguard Worker         break;
704*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_dtrsm):
705*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr);
706*e1eccf28SAndroid Build Coastguard Worker         cblas_dtrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d,
707*e1eccf28SAndroid Build Coastguard Worker                     (double*)A, lda, (double*)B, ldb);
708*e1eccf28SAndroid Build Coastguard Worker         break;
709*e1eccf28SAndroid Build Coastguard Worker 
710*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_cgemm):
711*e1eccf28SAndroid Build Coastguard Worker         setupGEMM(&mtls, ain, call, mCtx);
712*e1eccf28SAndroid Build Coastguard Worker         if (mtls.isThreadable) {
713*e1eccf28SAndroid Build Coastguard Worker             mCtx->launchThreads(walk_2d_cgemm, &mtls);
714*e1eccf28SAndroid Build Coastguard Worker         } else {
715*e1eccf28SAndroid Build Coastguard Worker             initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
716*e1eccf28SAndroid Build Coastguard Worker             cblas_cgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.c,
717*e1eccf28SAndroid Build Coastguard Worker                         A, lda, B, ldb, (void*)&call->beta.c, C, ldc);
718*e1eccf28SAndroid Build Coastguard Worker         }
719*e1eccf28SAndroid Build Coastguard Worker         break;
720*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_csymm):
721*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
722*e1eccf28SAndroid Build Coastguard Worker         cblas_csymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A,
723*e1eccf28SAndroid Build Coastguard Worker                     lda, B, ldb, (void*)&call->beta.c, C, ldc);
724*e1eccf28SAndroid Build Coastguard Worker         break;
725*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_csyrk):
726*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
727*e1eccf28SAndroid Build Coastguard Worker         cblas_csyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A,
728*e1eccf28SAndroid Build Coastguard Worker                     lda, (void*)&call->beta.c, C, ldc);
729*e1eccf28SAndroid Build Coastguard Worker         break;
730*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_csyr2k):
731*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
732*e1eccf28SAndroid Build Coastguard Worker         cblas_csyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A,
733*e1eccf28SAndroid Build Coastguard Worker                      lda, B, ldb, (void*)&call->beta.c, C, ldc);
734*e1eccf28SAndroid Build Coastguard Worker         break;
735*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ctrmm):
736*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
737*e1eccf28SAndroid Build Coastguard Worker         cblas_ctrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c,
738*e1eccf28SAndroid Build Coastguard Worker                     A, lda, B, ldb);
739*e1eccf28SAndroid Build Coastguard Worker         break;
740*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ctrsm):
741*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
742*e1eccf28SAndroid Build Coastguard Worker         cblas_ctrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c,
743*e1eccf28SAndroid Build Coastguard Worker                     A, lda, B, ldb);
744*e1eccf28SAndroid Build Coastguard Worker         break;
745*e1eccf28SAndroid Build Coastguard Worker 
746*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zgemm):
747*e1eccf28SAndroid Build Coastguard Worker         setupGEMM(&mtls, ain, call, mCtx);
748*e1eccf28SAndroid Build Coastguard Worker         if (mtls.isThreadable) {
749*e1eccf28SAndroid Build Coastguard Worker             mCtx->launchThreads(walk_2d_zgemm, &mtls);
750*e1eccf28SAndroid Build Coastguard Worker         } else {
751*e1eccf28SAndroid Build Coastguard Worker             initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
752*e1eccf28SAndroid Build Coastguard Worker             cblas_zgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.z,
753*e1eccf28SAndroid Build Coastguard Worker                         A, lda, B, ldb, (void*)&call->beta.z, C, ldc);
754*e1eccf28SAndroid Build Coastguard Worker         }
755*e1eccf28SAndroid Build Coastguard Worker         break;
756*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zsymm):
757*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
758*e1eccf28SAndroid Build Coastguard Worker         cblas_zsymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A,
759*e1eccf28SAndroid Build Coastguard Worker                     lda, B, ldb, (void*)&call->beta.z, C, ldc);
760*e1eccf28SAndroid Build Coastguard Worker         break;
761*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zsyrk):
762*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
763*e1eccf28SAndroid Build Coastguard Worker         cblas_zsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A,
764*e1eccf28SAndroid Build Coastguard Worker                     lda, (void*)&call->beta.z, C, ldc);
765*e1eccf28SAndroid Build Coastguard Worker         break;
766*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zsyr2k):
767*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
768*e1eccf28SAndroid Build Coastguard Worker         cblas_zsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A,
769*e1eccf28SAndroid Build Coastguard Worker                      lda, B, ldb, (void*)&call->beta.z, C, ldc);
770*e1eccf28SAndroid Build Coastguard Worker         break;
771*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ztrmm):
772*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
773*e1eccf28SAndroid Build Coastguard Worker         cblas_ztrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z,
774*e1eccf28SAndroid Build Coastguard Worker                     A, lda, B, ldb);
775*e1eccf28SAndroid Build Coastguard Worker         break;
776*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_ztrsm):
777*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
778*e1eccf28SAndroid Build Coastguard Worker         cblas_ztrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z,
779*e1eccf28SAndroid Build Coastguard Worker                     A, lda, B, ldb);
780*e1eccf28SAndroid Build Coastguard Worker         break;
781*e1eccf28SAndroid Build Coastguard Worker 
782*e1eccf28SAndroid Build Coastguard Worker     // Level 3 C and Z only
783*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_chemm):
784*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
785*e1eccf28SAndroid Build Coastguard Worker         cblas_chemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A, lda,
786*e1eccf28SAndroid Build Coastguard Worker                     B, ldb, (void*)&call->beta.c, C, ldc);
787*e1eccf28SAndroid Build Coastguard Worker         break;
788*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_cherk):
789*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
790*e1eccf28SAndroid Build Coastguard Worker         cblas_cherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, A, lda,
791*e1eccf28SAndroid Build Coastguard Worker                     call->beta.f, C, ldc);
792*e1eccf28SAndroid Build Coastguard Worker         break;
793*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_cher2k):
794*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
795*e1eccf28SAndroid Build Coastguard Worker         cblas_cher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A, lda,
796*e1eccf28SAndroid Build Coastguard Worker                      B, ldb, call->beta.f, C, ldc);
797*e1eccf28SAndroid Build Coastguard Worker         break;
798*e1eccf28SAndroid Build Coastguard Worker 
799*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zhemm):
800*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
801*e1eccf28SAndroid Build Coastguard Worker         cblas_zhemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A, lda,
802*e1eccf28SAndroid Build Coastguard Worker                     B, ldb, (void*)&call->beta.z, C, ldc);
803*e1eccf28SAndroid Build Coastguard Worker         break;
804*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zherk):
805*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
806*e1eccf28SAndroid Build Coastguard Worker         cblas_zherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, A, lda,
807*e1eccf28SAndroid Build Coastguard Worker                     call->beta.d, C, ldc);
808*e1eccf28SAndroid Build Coastguard Worker         break;
809*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_zher2k):
810*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
811*e1eccf28SAndroid Build Coastguard Worker         cblas_zher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A, lda,
812*e1eccf28SAndroid Build Coastguard Worker                      B, ldb, call->beta.d, C, ldc);
813*e1eccf28SAndroid Build Coastguard Worker         break;
814*e1eccf28SAndroid Build Coastguard Worker 
815*e1eccf28SAndroid Build Coastguard Worker 
816*e1eccf28SAndroid Build Coastguard Worker     case (RsBlas_bnnm):
817*e1eccf28SAndroid Build Coastguard Worker         initABC(ain, sizeof(uint8_t), &A, &B, &C, &lda, &ldb, &ldc);
818*e1eccf28SAndroid Build Coastguard Worker         kernelBNNM(call->M, call->N, call->K,
819*e1eccf28SAndroid Build Coastguard Worker                     (const uint8_t*)A, call->a_offset, lda,
820*e1eccf28SAndroid Build Coastguard Worker                     (const uint8_t*)B, call->b_offset, ldb,
821*e1eccf28SAndroid Build Coastguard Worker                     (uint8_t*)C, call->c_offset, ldc,
822*e1eccf28SAndroid Build Coastguard Worker                     call->c_mult_int);
823*e1eccf28SAndroid Build Coastguard Worker 
824*e1eccf28SAndroid Build Coastguard Worker         break;
825*e1eccf28SAndroid Build Coastguard Worker 
826*e1eccf28SAndroid Build Coastguard Worker     default:
827*e1eccf28SAndroid Build Coastguard Worker         ALOGE("unimplemented\n");
828*e1eccf28SAndroid Build Coastguard Worker     }
829*e1eccf28SAndroid Build Coastguard Worker 
830*e1eccf28SAndroid Build Coastguard Worker 
831*e1eccf28SAndroid Build Coastguard Worker }
832*e1eccf28SAndroid Build Coastguard Worker 
kernelBNNM(size_t m,size_t n,size_t k,const uint8_t * a,uint8_t a_offset,size_t lda,const uint8_t * b,uint8_t b_offset,size_t ldb,uint8_t * c,int32_t c_offset,size_t ldc,int32_t c_mult_int)833*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsicBLAS::kernelBNNM(size_t m, size_t n, size_t k,
834*e1eccf28SAndroid Build Coastguard Worker                                            const uint8_t* a, uint8_t a_offset, size_t lda,
835*e1eccf28SAndroid Build Coastguard Worker                                            const uint8_t* b, uint8_t b_offset, size_t ldb,
836*e1eccf28SAndroid Build Coastguard Worker                                            uint8_t* c, int32_t c_offset, size_t ldc,
837*e1eccf28SAndroid Build Coastguard Worker                                            int32_t c_mult_int) {
838*e1eccf28SAndroid Build Coastguard Worker     const int c_shift = 21;
839*e1eccf28SAndroid Build Coastguard Worker #if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_ARM_USE_INTRINSICS)
840*e1eccf28SAndroid Build Coastguard Worker     // Non-optimized path for ARMv7 devices without SIMD instructions.
841*e1eccf28SAndroid Build Coastguard Worker     if (!gArchUseSIMD) {
842*e1eccf28SAndroid Build Coastguard Worker         /*
843*e1eccf28SAndroid Build Coastguard Worker          * Calculations are done in 1.10.21 fixed-point format for the final output,
844*e1eccf28SAndroid Build Coastguard Worker          * just before there's a shift down to drop the fractional parts. The output
845*e1eccf28SAndroid Build Coastguard Worker          * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
846*e1eccf28SAndroid Build Coastguard Worker          * gives some headroom to avoid wrapping around on small overflows.
847*e1eccf28SAndroid Build Coastguard Worker          */
848*e1eccf28SAndroid Build Coastguard Worker         size_t i = 0, j = 0, l = 0;
849*e1eccf28SAndroid Build Coastguard Worker         for (j = 0; j < n; j++) {
850*e1eccf28SAndroid Build Coastguard Worker             for (i = 0; i < m; i++) {
851*e1eccf28SAndroid Build Coastguard Worker                 int32_t total = 0;
852*e1eccf28SAndroid Build Coastguard Worker                 for (l = 0; l < k; l++) {
853*e1eccf28SAndroid Build Coastguard Worker                     const int a_index = ((i * lda) + l);
854*e1eccf28SAndroid Build Coastguard Worker                     const uint8_t a_as_byte = a[a_index];
855*e1eccf28SAndroid Build Coastguard Worker                     const int32_t a_as_int = (((int32_t)(a_as_byte)) - a_offset);
856*e1eccf28SAndroid Build Coastguard Worker                     const int b_index = ((j * ldb) + l);
857*e1eccf28SAndroid Build Coastguard Worker                     const uint8_t b_as_byte = b[b_index];
858*e1eccf28SAndroid Build Coastguard Worker                     const int32_t b_as_int = (((int32_t)(b_as_byte)) - b_offset);
859*e1eccf28SAndroid Build Coastguard Worker                     const int32_t mult_as_int = (a_as_int * b_as_int);
860*e1eccf28SAndroid Build Coastguard Worker                     total += mult_as_int;
861*e1eccf28SAndroid Build Coastguard Worker                 }
862*e1eccf28SAndroid Build Coastguard Worker                 const int c_index = ((ldc * i) + j);
863*e1eccf28SAndroid Build Coastguard Worker                 int32_t output =
864*e1eccf28SAndroid Build Coastguard Worker                     ((((total + c_offset) * c_mult_int) + (1 << (c_shift - 1)))
865*e1eccf28SAndroid Build Coastguard Worker                      >> c_shift);
866*e1eccf28SAndroid Build Coastguard Worker                 if (output > 255) {
867*e1eccf28SAndroid Build Coastguard Worker                     output = 255;
868*e1eccf28SAndroid Build Coastguard Worker                 }
869*e1eccf28SAndroid Build Coastguard Worker                 if (output < 0) {
870*e1eccf28SAndroid Build Coastguard Worker                     output = 0;
871*e1eccf28SAndroid Build Coastguard Worker                 }
872*e1eccf28SAndroid Build Coastguard Worker                 c[c_index] = (uint8_t)(output);
873*e1eccf28SAndroid Build Coastguard Worker             }
874*e1eccf28SAndroid Build Coastguard Worker         }
875*e1eccf28SAndroid Build Coastguard Worker         return;
876*e1eccf28SAndroid Build Coastguard Worker     }
877*e1eccf28SAndroid Build Coastguard Worker #endif
878*e1eccf28SAndroid Build Coastguard Worker 
879*e1eccf28SAndroid Build Coastguard Worker     // Using gemmlowp to calculate the low precision 8 bit GEMM.
880*e1eccf28SAndroid Build Coastguard Worker     // Set MaxNumThreads to 0. The value 0 lets the implementation query
881*e1eccf28SAndroid Build Coastguard Worker     // the system to determine the number of hardware threads
882*e1eccf28SAndroid Build Coastguard Worker     gemmlowp::eight_bit_int_gemm::SetMaxNumThreads(0);
883*e1eccf28SAndroid Build Coastguard Worker 
884*e1eccf28SAndroid Build Coastguard Worker     bool transpose_a = true;
885*e1eccf28SAndroid Build Coastguard Worker     bool transpose_b = false;
886*e1eccf28SAndroid Build Coastguard Worker     bool transpose_c = true;
887*e1eccf28SAndroid Build Coastguard Worker     gemmlowp::eight_bit_int_gemm::EightBitIntGemm(transpose_a, transpose_b, transpose_c,
888*e1eccf28SAndroid Build Coastguard Worker                                                   m, n, k, a, -a_offset, lda,
889*e1eccf28SAndroid Build Coastguard Worker                                                   b, -b_offset, ldb, c, c_offset,
890*e1eccf28SAndroid Build Coastguard Worker                                                   c_mult_int, c_shift, ldc,
891*e1eccf28SAndroid Build Coastguard Worker                                                   gemmlowp::eight_bit_int_gemm::BitDepthSetting::A8B8);
892*e1eccf28SAndroid Build Coastguard Worker 
893*e1eccf28SAndroid Build Coastguard Worker }
894*e1eccf28SAndroid Build Coastguard Worker 
895*e1eccf28SAndroid Build Coastguard Worker 
896*e1eccf28SAndroid Build Coastguard Worker 
897*e1eccf28SAndroid Build Coastguard Worker 
898*e1eccf28SAndroid Build Coastguard Worker 
RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl * ctx,const Script * s)899*e1eccf28SAndroid Build Coastguard Worker RsdCpuScriptIntrinsicBLAS::RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx,
900*e1eccf28SAndroid Build Coastguard Worker                                                    const Script *s)
901*e1eccf28SAndroid Build Coastguard Worker             : RsdCpuScriptIntrinsic(ctx, s, nullptr, RS_SCRIPT_INTRINSIC_ID_BLAS) {
902*e1eccf28SAndroid Build Coastguard Worker 
903*e1eccf28SAndroid Build Coastguard Worker 
904*e1eccf28SAndroid Build Coastguard Worker }
905*e1eccf28SAndroid Build Coastguard Worker 
~RsdCpuScriptIntrinsicBLAS()906*e1eccf28SAndroid Build Coastguard Worker RsdCpuScriptIntrinsicBLAS::~RsdCpuScriptIntrinsicBLAS() {
907*e1eccf28SAndroid Build Coastguard Worker }
908*e1eccf28SAndroid Build Coastguard Worker 
rsdIntrinsic_BLAS(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)909*e1eccf28SAndroid Build Coastguard Worker RsdCpuScriptImpl * rsdIntrinsic_BLAS(RsdCpuReferenceImpl *ctx,
910*e1eccf28SAndroid Build Coastguard Worker                                     const Script *s, const Element *e) {
911*e1eccf28SAndroid Build Coastguard Worker 
912*e1eccf28SAndroid Build Coastguard Worker     return new RsdCpuScriptIntrinsicBLAS(ctx, s);
913*e1eccf28SAndroid Build Coastguard Worker }
914*e1eccf28SAndroid Build Coastguard Worker 
915*e1eccf28SAndroid Build Coastguard Worker } // namespace renderscript
916*e1eccf28SAndroid Build Coastguard Worker } // namespace android
917