xref: /aosp_15_r20/external/ComputeLibrary/tests/validation/reference/GEMMLowp.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1*c217d954SCole Faust /*
2*c217d954SCole Faust  * Copyright (c) 2017-2020 Arm Limited.
3*c217d954SCole Faust  *
4*c217d954SCole Faust  * SPDX-License-Identifier: MIT
5*c217d954SCole Faust  *
6*c217d954SCole Faust  * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust  * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust  * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust  * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust  * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust  *
13*c217d954SCole Faust  * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust  * copies or substantial portions of the Software.
15*c217d954SCole Faust  *
16*c217d954SCole Faust  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust  * SOFTWARE.
23*c217d954SCole Faust  */
24*c217d954SCole Faust #include "GEMMLowp.h"
25*c217d954SCole Faust 
26*c217d954SCole Faust #include "arm_compute/core/Types.h"
27*c217d954SCole Faust #include "tests/validation/reference/UtilsQuantizedAsymm.h"
28*c217d954SCole Faust 
29*c217d954SCole Faust #include "support/ToolchainSupport.h"
30*c217d954SCole Faust 
31*c217d954SCole Faust #include <limits>
32*c217d954SCole Faust 
33*c217d954SCole Faust namespace arm_compute
34*c217d954SCole Faust {
35*c217d954SCole Faust namespace test
36*c217d954SCole Faust {
37*c217d954SCole Faust namespace validation
38*c217d954SCole Faust {
39*c217d954SCole Faust namespace reference
40*c217d954SCole Faust {
41*c217d954SCole Faust namespace
42*c217d954SCole Faust {
43*c217d954SCole Faust template <typename T>
44*c217d954SCole Faust struct DataTypeExtractor
45*c217d954SCole Faust {
data_typearm_compute::test::validation::reference::__anon8a60d57b0111::DataTypeExtractor46*c217d954SCole Faust     static DataType data_type()
47*c217d954SCole Faust     {
48*c217d954SCole Faust         DataType data_type = DataType::UNKNOWN;
49*c217d954SCole Faust         if(std::is_same<T, int8_t>::value)
50*c217d954SCole Faust         {
51*c217d954SCole Faust             data_type = DataType::QASYMM8_SIGNED;
52*c217d954SCole Faust         }
53*c217d954SCole Faust         else if(std::is_same<T, uint8_t>::value)
54*c217d954SCole Faust         {
55*c217d954SCole Faust             data_type = DataType::QASYMM8;
56*c217d954SCole Faust         }
57*c217d954SCole Faust         else if(std::is_same<T, int16_t>::value)
58*c217d954SCole Faust         {
59*c217d954SCole Faust             data_type = DataType::QSYMM16;
60*c217d954SCole Faust         }
61*c217d954SCole Faust         return data_type;
62*c217d954SCole Faust     }
63*c217d954SCole Faust };
64*c217d954SCole Faust 
65*c217d954SCole Faust template <typename TIn, typename TOut>
quantize_down_scale(const SimpleTensor<TIn> * in,const SimpleTensor<TIn> * bias,SimpleTensor<TOut> * dst,int32_t result_offset,std::vector<int32_t> result_mult_int,std::vector<int32_t> result_shift,int32_t min,int32_t max)66*c217d954SCole Faust void quantize_down_scale(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, int32_t result_offset, std::vector<int32_t> result_mult_int,
67*c217d954SCole Faust                          std::vector<int32_t> result_shift, int32_t min, int32_t max)
68*c217d954SCole Faust {
69*c217d954SCole Faust     const int  cols_in        = in->shape().x();
70*c217d954SCole Faust     const bool is_per_channel = result_mult_int.size() > 1;
71*c217d954SCole Faust 
72*c217d954SCole Faust #if defined(_OPENMP)
73*c217d954SCole Faust     #pragma omp parallel for
74*c217d954SCole Faust #endif /* _OPENMP */
75*c217d954SCole Faust     for(int i = 0; i < in->num_elements(); ++i)
76*c217d954SCole Faust     {
77*c217d954SCole Faust         int32_t result = ((*in)[i] + result_offset);
78*c217d954SCole Faust 
79*c217d954SCole Faust         if(bias != nullptr)
80*c217d954SCole Faust         {
81*c217d954SCole Faust             result += (*bias)[i % cols_in];
82*c217d954SCole Faust         }
83*c217d954SCole Faust 
84*c217d954SCole Faust         result *= (is_per_channel) ? result_mult_int[i % cols_in] : result_mult_int[0];
85*c217d954SCole Faust 
86*c217d954SCole Faust         result >>= (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];
87*c217d954SCole Faust 
88*c217d954SCole Faust         // Bounded ReLu
89*c217d954SCole Faust         if(min != max)
90*c217d954SCole Faust         {
91*c217d954SCole Faust             result = std::max(min, std::min(max, result));
92*c217d954SCole Faust         }
93*c217d954SCole Faust 
94*c217d954SCole Faust         (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
95*c217d954SCole Faust                                                     std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
96*c217d954SCole Faust     }
97*c217d954SCole Faust }
98*c217d954SCole Faust 
99*c217d954SCole Faust template <typename TIn, typename TOut>
quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> * in,const SimpleTensor<TIn> * bias,SimpleTensor<TOut> * dst,std::vector<int32_t> result_fixedpoint_multiplier,std::vector<int32_t> result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max)100*c217d954SCole Faust void quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, std::vector<int32_t> result_fixedpoint_multiplier,
101*c217d954SCole Faust                                        std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)
102*c217d954SCole Faust {
103*c217d954SCole Faust     const int  cols_in        = in->shape().x();
104*c217d954SCole Faust     const bool is_per_channel = result_fixedpoint_multiplier.size() > 1;
105*c217d954SCole Faust 
106*c217d954SCole Faust #if defined(_OPENMP)
107*c217d954SCole Faust     #pragma omp parallel for
108*c217d954SCole Faust #endif /* _OPENMP */
109*c217d954SCole Faust     for(int i = 0; i < in->num_elements(); ++i)
110*c217d954SCole Faust     {
111*c217d954SCole Faust         TIn result = (*in)[i];
112*c217d954SCole Faust 
113*c217d954SCole Faust         if(bias != nullptr)
114*c217d954SCole Faust         {
115*c217d954SCole Faust             result += (*bias)[i % cols_in];
116*c217d954SCole Faust         }
117*c217d954SCole Faust 
118*c217d954SCole Faust         // Fixed point multiplication
119*c217d954SCole Faust         const int32_t multiplier = (is_per_channel) ? result_fixedpoint_multiplier[i % cols_in] : result_fixedpoint_multiplier[0];
120*c217d954SCole Faust         const int32_t shift      = (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];
121*c217d954SCole Faust 
122*c217d954SCole Faust         if(shift < 0)
123*c217d954SCole Faust         {
124*c217d954SCole Faust             result = asymm_int_mult(result * (1 << (-shift)), multiplier);
125*c217d954SCole Faust         }
126*c217d954SCole Faust         else
127*c217d954SCole Faust         {
128*c217d954SCole Faust             result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, multiplier), shift);
129*c217d954SCole Faust         }
130*c217d954SCole Faust         result += result_offset_after_shift;
131*c217d954SCole Faust 
132*c217d954SCole Faust         // Bounded ReLu
133*c217d954SCole Faust         if(min != max)
134*c217d954SCole Faust         {
135*c217d954SCole Faust             result = std::max(min, std::min(max, result));
136*c217d954SCole Faust         }
137*c217d954SCole Faust 
138*c217d954SCole Faust         (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
139*c217d954SCole Faust                                                     std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
140*c217d954SCole Faust     }
141*c217d954SCole Faust }
142*c217d954SCole Faust 
143*c217d954SCole Faust template <typename TIn, typename TOut>
quantize_down_scale_by_float(const SimpleTensor<TIn> * in,const SimpleTensor<TIn> * bias,SimpleTensor<TOut> * dst,std::vector<float_t> result_real_multiplier,int32_t result_offset,int32_t min,int32_t max)144*c217d954SCole Faust void quantize_down_scale_by_float(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, std::vector<float_t> result_real_multiplier,
145*c217d954SCole Faust                                   int32_t result_offset, int32_t min, int32_t max)
146*c217d954SCole Faust {
147*c217d954SCole Faust     const int  cols_in        = in->shape().x();
148*c217d954SCole Faust     const bool is_per_channel = result_real_multiplier.size() > 1;
149*c217d954SCole Faust 
150*c217d954SCole Faust #if defined(_OPENMP)
151*c217d954SCole Faust     #pragma omp parallel for
152*c217d954SCole Faust #endif /* _OPENMP */
153*c217d954SCole Faust     for(int i = 0; i < in->num_elements(); ++i)
154*c217d954SCole Faust     {
155*c217d954SCole Faust         TIn result = (*in)[i];
156*c217d954SCole Faust 
157*c217d954SCole Faust         if(bias != nullptr)
158*c217d954SCole Faust         {
159*c217d954SCole Faust             result += (*bias)[i % cols_in];
160*c217d954SCole Faust         }
161*c217d954SCole Faust 
162*c217d954SCole Faust         // Float multiplication
163*c217d954SCole Faust         const float_t multiplier = (is_per_channel) ? result_real_multiplier[i % cols_in] : result_real_multiplier[0];
164*c217d954SCole Faust 
165*c217d954SCole Faust         float_t result_f = static_cast<float_t>(result) * multiplier + static_cast<float_t>(result_offset);
166*c217d954SCole Faust         result           = static_cast<TIn>(support::cpp11::round(result_f));
167*c217d954SCole Faust 
168*c217d954SCole Faust         // Bounded ReLu
169*c217d954SCole Faust         if(min != max)
170*c217d954SCole Faust         {
171*c217d954SCole Faust             result = std::max(min, std::min(max, result));
172*c217d954SCole Faust         }
173*c217d954SCole Faust 
174*c217d954SCole Faust         (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
175*c217d954SCole Faust                                                     std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
176*c217d954SCole Faust     }
177*c217d954SCole Faust }
178*c217d954SCole Faust } // namespace
179*c217d954SCole Faust 
180*c217d954SCole Faust template <typename T_out, typename T_in, typename T_in_1>
gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> & a,const SimpleTensor<T_in_1> & b,TensorShape shape_c,int32_t a_offset,int32_t b_offset)181*c217d954SCole Faust SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset)
182*c217d954SCole Faust {
183*c217d954SCole Faust     static_assert(std::is_same<typename std::decay<T_out>::type, int32_t>::value, "Only int32_t is allowed for the output");
184*c217d954SCole Faust 
185*c217d954SCole Faust     DataType            dt = std::is_same<T_out, int32_t>::value ? DataType::S32 : DataType::U32;
186*c217d954SCole Faust     SimpleTensor<T_out> c(shape_c, dt);
187*c217d954SCole Faust 
188*c217d954SCole Faust     const int K = a.shape().x();
189*c217d954SCole Faust     const int M = a.shape().y();
190*c217d954SCole Faust     const int N = b.shape().x();
191*c217d954SCole Faust     const int D = a.shape().z(); // Number of matrices in a batch
192*c217d954SCole Faust 
193*c217d954SCole Faust     const int a_stride_z = K * M;
194*c217d954SCole Faust     // Do not slide the matrix B along the 3rd dimension in case matrix B has less than 3 dimensions
195*c217d954SCole Faust     const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0;
196*c217d954SCole Faust     const int c_stride_z = N * M;
197*c217d954SCole Faust 
198*c217d954SCole Faust     std::vector<T_out> acc;
199*c217d954SCole Faust     acc.resize(N);
200*c217d954SCole Faust 
201*c217d954SCole Faust     for(int depth = 0; depth < D; ++depth)
202*c217d954SCole Faust     {
203*c217d954SCole Faust         const int base_addr_a = depth * a_stride_z;
204*c217d954SCole Faust         const int base_addr_b = depth * b_stride_z;
205*c217d954SCole Faust         const int base_addr_c = depth * c_stride_z;
206*c217d954SCole Faust 
207*c217d954SCole Faust         for(int i = 0; i < M; ++i)
208*c217d954SCole Faust         {
209*c217d954SCole Faust             for(int j = 0; j < N; ++j)
210*c217d954SCole Faust             {
211*c217d954SCole Faust                 acc[j] = 0;
212*c217d954SCole Faust             }
213*c217d954SCole Faust             for(int k = 0; k < K; ++k)
214*c217d954SCole Faust             {
215*c217d954SCole Faust                 const T_out tmp_a = a_offset + static_cast<T_out>(a[base_addr_a + k + i * K]);
216*c217d954SCole Faust                 for(int j = 0; j < N; ++j)
217*c217d954SCole Faust                 {
218*c217d954SCole Faust                     const T_out tmp_b       = b_offset + static_cast<T_out>(b[base_addr_b + j + k * N]);
219*c217d954SCole Faust                     const T_out mult_as_int = tmp_a * tmp_b;
220*c217d954SCole Faust                     acc[j] += mult_as_int;
221*c217d954SCole Faust                 }
222*c217d954SCole Faust             }
223*c217d954SCole Faust             for(int j = 0; j < N; ++j)
224*c217d954SCole Faust             {
225*c217d954SCole Faust                 c[base_addr_c + j + i * N] = acc[j];
226*c217d954SCole Faust             }
227*c217d954SCole Faust         }
228*c217d954SCole Faust     }
229*c217d954SCole Faust 
230*c217d954SCole Faust     return c;
231*c217d954SCole Faust }
232*c217d954SCole Faust 
233*c217d954SCole Faust // used to validate assembly kernels which don't know anything about offsets
234*c217d954SCole Faust template <typename T1, typename T2, typename T3>
gemmlowp(const SimpleTensor<T2> & a,const SimpleTensor<T3> & b,TensorShape shape_c)235*c217d954SCole Faust SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c)
236*c217d954SCole Faust {
237*c217d954SCole Faust     return gemmlowp_matrix_multiply_core<T1, T2, T3>(a, b, shape_c, 0, 0);
238*c217d954SCole Faust }
239*c217d954SCole Faust 
240*c217d954SCole Faust template <typename TIn, typename TOut>
gemmlowp_quantize_down_scale(const SimpleTensor<TIn> & in,int32_t result_offset,std::vector<int32_t> result_mult_int,std::vector<int32_t> result_shift,int32_t min,int32_t max)241*c217d954SCole Faust SimpleTensor<TOut> gemmlowp_quantize_down_scale(const SimpleTensor<TIn> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift,
242*c217d954SCole Faust                                                 int32_t min, int32_t max)
243*c217d954SCole Faust {
244*c217d954SCole Faust     SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
245*c217d954SCole Faust 
246*c217d954SCole Faust     quantize_down_scale<TIn, TOut>(&in, nullptr, &dst, result_offset, result_mult_int, result_shift, min, max);
247*c217d954SCole Faust 
248*c217d954SCole Faust     return dst;
249*c217d954SCole Faust }
250*c217d954SCole Faust 
251*c217d954SCole Faust template <typename TIn, typename TOut>
gemmlowp_quantize_down_scale(const SimpleTensor<TIn> & in,const SimpleTensor<TIn> & bias,int32_t result_offset,std::vector<int32_t> result_mult_int,std::vector<int32_t> result_shift,int32_t min,int32_t max)252*c217d954SCole Faust SimpleTensor<TOut> gemmlowp_quantize_down_scale(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias, int32_t result_offset, std::vector<int32_t> result_mult_int,
253*c217d954SCole Faust                                                 std::vector<int32_t> result_shift, int32_t min, int32_t max)
254*c217d954SCole Faust {
255*c217d954SCole Faust     SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
256*c217d954SCole Faust 
257*c217d954SCole Faust     quantize_down_scale<TIn, TOut>(&in, &bias, &dst, result_offset, result_mult_int, result_shift, min, max);
258*c217d954SCole Faust 
259*c217d954SCole Faust     return dst;
260*c217d954SCole Faust }
261*c217d954SCole Faust 
262*c217d954SCole Faust template <typename TIn, typename TOut>
gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> & in,std::vector<int32_t> result_fixedpoint_multiplier,std::vector<int32_t> result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max)263*c217d954SCole Faust SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, std::vector<int32_t> result_fixedpoint_multiplier, std::vector<int32_t> result_shift,
264*c217d954SCole Faust                                                               int32_t result_offset_after_shift, int32_t min, int32_t max)
265*c217d954SCole Faust {
266*c217d954SCole Faust     SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
267*c217d954SCole Faust 
268*c217d954SCole Faust     quantize_down_scale_by_fixedpoint<TIn, TOut>(&in, nullptr, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
269*c217d954SCole Faust 
270*c217d954SCole Faust     return dst;
271*c217d954SCole Faust }
272*c217d954SCole Faust 
273*c217d954SCole Faust template <typename TIn, typename TOut>
gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> & in,const SimpleTensor<TIn> & bias,std::vector<int32_t> result_fixedpoint_multiplier,std::vector<int32_t> result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max)274*c217d954SCole Faust SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias, std::vector<int32_t> result_fixedpoint_multiplier,
275*c217d954SCole Faust                                                               std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)
276*c217d954SCole Faust {
277*c217d954SCole Faust     SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
278*c217d954SCole Faust 
279*c217d954SCole Faust     quantize_down_scale_by_fixedpoint<TIn, TOut>(&in, &bias, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
280*c217d954SCole Faust 
281*c217d954SCole Faust     return dst;
282*c217d954SCole Faust }
283*c217d954SCole Faust 
284*c217d954SCole Faust template <typename TIn, typename TOut>
gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> & in,const SimpleTensor<TIn> & bias,std::vector<float_t> result_real_multiplier,int32_t result_offset,int32_t min,int32_t max)285*c217d954SCole Faust SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias,
286*c217d954SCole Faust                                                          std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max)
287*c217d954SCole Faust {
288*c217d954SCole Faust     SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
289*c217d954SCole Faust 
290*c217d954SCole Faust     quantize_down_scale_by_float<TIn, TOut>(&in, &bias, &dst, result_real_multiplier, result_offset, min, max);
291*c217d954SCole Faust 
292*c217d954SCole Faust     return dst;
293*c217d954SCole Faust }
294*c217d954SCole Faust 
295*c217d954SCole Faust template <typename TIn, typename TOut>
gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> & in,std::vector<float_t> result_real_multiplier,int32_t result_offset,int32_t min,int32_t max)296*c217d954SCole Faust SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in,
297*c217d954SCole Faust                                                          std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max)
298*c217d954SCole Faust {
299*c217d954SCole Faust     SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
300*c217d954SCole Faust 
301*c217d954SCole Faust     quantize_down_scale_by_float<TIn, TOut>(&in, nullptr, &dst, result_real_multiplier, result_offset, min, max);
302*c217d954SCole Faust 
303*c217d954SCole Faust     return dst;
304*c217d954SCole Faust }
305*c217d954SCole Faust 
306*c217d954SCole Faust template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
307*c217d954SCole Faust                                                                      std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
308*c217d954SCole Faust template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a,
309*c217d954SCole Faust                                                                      std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
310*c217d954SCole Faust template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
311*c217d954SCole Faust                                                                     std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
312*c217d954SCole Faust template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a,
313*c217d954SCole Faust                                                                     std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
314*c217d954SCole Faust template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
315*c217d954SCole Faust                                                                           std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
316*c217d954SCole Faust template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
317*c217d954SCole Faust                                                                           std::vector<int32_t> result_fixedpoint_multiplier,
318*c217d954SCole Faust                                                                           std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
319*c217d954SCole Faust template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
320*c217d954SCole Faust                                                                          std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
321*c217d954SCole Faust template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
322*c217d954SCole Faust                                                                          std::vector<int32_t> result_fixedpoint_multiplier,
323*c217d954SCole Faust                                                                          std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
324*c217d954SCole Faust template SimpleTensor<int16_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
325*c217d954SCole Faust                                                                           std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
326*c217d954SCole Faust template SimpleTensor<int16_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
327*c217d954SCole Faust                                                                           std::vector<int32_t> result_fixedpoint_multiplier,
328*c217d954SCole Faust                                                                           std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
329*c217d954SCole Faust template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int,
330*c217d954SCole Faust                                                             std::vector<int32_t> result_shift, int32_t min, int32_t max);
331*c217d954SCole Faust template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int,
332*c217d954SCole Faust                                                             std::vector<int32_t> result_shift, int32_t min, int32_t max);
333*c217d954SCole Faust template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int,
334*c217d954SCole Faust                                                            std::vector<int32_t> result_shift, int32_t min, int32_t max);
335*c217d954SCole Faust template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int,
336*c217d954SCole Faust                                                            std::vector<int32_t> result_shift, int32_t min, int32_t max);
337*c217d954SCole Faust template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
338*c217d954SCole Faust template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
339*c217d954SCole Faust template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
340*c217d954SCole Faust template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c);
341*c217d954SCole Faust template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
342*c217d954SCole Faust } // namespace reference
343*c217d954SCole Faust } // namespace validation
344*c217d954SCole Faust } // namespace test
345*c217d954SCole Faust } // namespace arm_compute
346