1*c217d954SCole Faust /*
2*c217d954SCole Faust * Copyright (c) 2017-2020 Arm Limited.
3*c217d954SCole Faust *
4*c217d954SCole Faust * SPDX-License-Identifier: MIT
5*c217d954SCole Faust *
6*c217d954SCole Faust * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust *
13*c217d954SCole Faust * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust * copies or substantial portions of the Software.
15*c217d954SCole Faust *
16*c217d954SCole Faust * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust * SOFTWARE.
23*c217d954SCole Faust */
24*c217d954SCole Faust #include "GEMMLowp.h"
25*c217d954SCole Faust
26*c217d954SCole Faust #include "arm_compute/core/Types.h"
27*c217d954SCole Faust #include "tests/validation/reference/UtilsQuantizedAsymm.h"
28*c217d954SCole Faust
29*c217d954SCole Faust #include "support/ToolchainSupport.h"
30*c217d954SCole Faust
31*c217d954SCole Faust #include <limits>
32*c217d954SCole Faust
33*c217d954SCole Faust namespace arm_compute
34*c217d954SCole Faust {
35*c217d954SCole Faust namespace test
36*c217d954SCole Faust {
37*c217d954SCole Faust namespace validation
38*c217d954SCole Faust {
39*c217d954SCole Faust namespace reference
40*c217d954SCole Faust {
41*c217d954SCole Faust namespace
42*c217d954SCole Faust {
43*c217d954SCole Faust template <typename T>
44*c217d954SCole Faust struct DataTypeExtractor
45*c217d954SCole Faust {
data_typearm_compute::test::validation::reference::__anon8a60d57b0111::DataTypeExtractor46*c217d954SCole Faust static DataType data_type()
47*c217d954SCole Faust {
48*c217d954SCole Faust DataType data_type = DataType::UNKNOWN;
49*c217d954SCole Faust if(std::is_same<T, int8_t>::value)
50*c217d954SCole Faust {
51*c217d954SCole Faust data_type = DataType::QASYMM8_SIGNED;
52*c217d954SCole Faust }
53*c217d954SCole Faust else if(std::is_same<T, uint8_t>::value)
54*c217d954SCole Faust {
55*c217d954SCole Faust data_type = DataType::QASYMM8;
56*c217d954SCole Faust }
57*c217d954SCole Faust else if(std::is_same<T, int16_t>::value)
58*c217d954SCole Faust {
59*c217d954SCole Faust data_type = DataType::QSYMM16;
60*c217d954SCole Faust }
61*c217d954SCole Faust return data_type;
62*c217d954SCole Faust }
63*c217d954SCole Faust };
64*c217d954SCole Faust
65*c217d954SCole Faust template <typename TIn, typename TOut>
quantize_down_scale(const SimpleTensor<TIn> * in,const SimpleTensor<TIn> * bias,SimpleTensor<TOut> * dst,int32_t result_offset,std::vector<int32_t> result_mult_int,std::vector<int32_t> result_shift,int32_t min,int32_t max)66*c217d954SCole Faust void quantize_down_scale(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, int32_t result_offset, std::vector<int32_t> result_mult_int,
67*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t min, int32_t max)
68*c217d954SCole Faust {
69*c217d954SCole Faust const int cols_in = in->shape().x();
70*c217d954SCole Faust const bool is_per_channel = result_mult_int.size() > 1;
71*c217d954SCole Faust
72*c217d954SCole Faust #if defined(_OPENMP)
73*c217d954SCole Faust #pragma omp parallel for
74*c217d954SCole Faust #endif /* _OPENMP */
75*c217d954SCole Faust for(int i = 0; i < in->num_elements(); ++i)
76*c217d954SCole Faust {
77*c217d954SCole Faust int32_t result = ((*in)[i] + result_offset);
78*c217d954SCole Faust
79*c217d954SCole Faust if(bias != nullptr)
80*c217d954SCole Faust {
81*c217d954SCole Faust result += (*bias)[i % cols_in];
82*c217d954SCole Faust }
83*c217d954SCole Faust
84*c217d954SCole Faust result *= (is_per_channel) ? result_mult_int[i % cols_in] : result_mult_int[0];
85*c217d954SCole Faust
86*c217d954SCole Faust result >>= (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];
87*c217d954SCole Faust
88*c217d954SCole Faust // Bounded ReLu
89*c217d954SCole Faust if(min != max)
90*c217d954SCole Faust {
91*c217d954SCole Faust result = std::max(min, std::min(max, result));
92*c217d954SCole Faust }
93*c217d954SCole Faust
94*c217d954SCole Faust (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
95*c217d954SCole Faust std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
96*c217d954SCole Faust }
97*c217d954SCole Faust }
98*c217d954SCole Faust
99*c217d954SCole Faust template <typename TIn, typename TOut>
quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> * in,const SimpleTensor<TIn> * bias,SimpleTensor<TOut> * dst,std::vector<int32_t> result_fixedpoint_multiplier,std::vector<int32_t> result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max)100*c217d954SCole Faust void quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, std::vector<int32_t> result_fixedpoint_multiplier,
101*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)
102*c217d954SCole Faust {
103*c217d954SCole Faust const int cols_in = in->shape().x();
104*c217d954SCole Faust const bool is_per_channel = result_fixedpoint_multiplier.size() > 1;
105*c217d954SCole Faust
106*c217d954SCole Faust #if defined(_OPENMP)
107*c217d954SCole Faust #pragma omp parallel for
108*c217d954SCole Faust #endif /* _OPENMP */
109*c217d954SCole Faust for(int i = 0; i < in->num_elements(); ++i)
110*c217d954SCole Faust {
111*c217d954SCole Faust TIn result = (*in)[i];
112*c217d954SCole Faust
113*c217d954SCole Faust if(bias != nullptr)
114*c217d954SCole Faust {
115*c217d954SCole Faust result += (*bias)[i % cols_in];
116*c217d954SCole Faust }
117*c217d954SCole Faust
118*c217d954SCole Faust // Fixed point multiplication
119*c217d954SCole Faust const int32_t multiplier = (is_per_channel) ? result_fixedpoint_multiplier[i % cols_in] : result_fixedpoint_multiplier[0];
120*c217d954SCole Faust const int32_t shift = (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];
121*c217d954SCole Faust
122*c217d954SCole Faust if(shift < 0)
123*c217d954SCole Faust {
124*c217d954SCole Faust result = asymm_int_mult(result * (1 << (-shift)), multiplier);
125*c217d954SCole Faust }
126*c217d954SCole Faust else
127*c217d954SCole Faust {
128*c217d954SCole Faust result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, multiplier), shift);
129*c217d954SCole Faust }
130*c217d954SCole Faust result += result_offset_after_shift;
131*c217d954SCole Faust
132*c217d954SCole Faust // Bounded ReLu
133*c217d954SCole Faust if(min != max)
134*c217d954SCole Faust {
135*c217d954SCole Faust result = std::max(min, std::min(max, result));
136*c217d954SCole Faust }
137*c217d954SCole Faust
138*c217d954SCole Faust (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
139*c217d954SCole Faust std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
140*c217d954SCole Faust }
141*c217d954SCole Faust }
142*c217d954SCole Faust
143*c217d954SCole Faust template <typename TIn, typename TOut>
quantize_down_scale_by_float(const SimpleTensor<TIn> * in,const SimpleTensor<TIn> * bias,SimpleTensor<TOut> * dst,std::vector<float_t> result_real_multiplier,int32_t result_offset,int32_t min,int32_t max)144*c217d954SCole Faust void quantize_down_scale_by_float(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, std::vector<float_t> result_real_multiplier,
145*c217d954SCole Faust int32_t result_offset, int32_t min, int32_t max)
146*c217d954SCole Faust {
147*c217d954SCole Faust const int cols_in = in->shape().x();
148*c217d954SCole Faust const bool is_per_channel = result_real_multiplier.size() > 1;
149*c217d954SCole Faust
150*c217d954SCole Faust #if defined(_OPENMP)
151*c217d954SCole Faust #pragma omp parallel for
152*c217d954SCole Faust #endif /* _OPENMP */
153*c217d954SCole Faust for(int i = 0; i < in->num_elements(); ++i)
154*c217d954SCole Faust {
155*c217d954SCole Faust TIn result = (*in)[i];
156*c217d954SCole Faust
157*c217d954SCole Faust if(bias != nullptr)
158*c217d954SCole Faust {
159*c217d954SCole Faust result += (*bias)[i % cols_in];
160*c217d954SCole Faust }
161*c217d954SCole Faust
162*c217d954SCole Faust // Float multiplication
163*c217d954SCole Faust const float_t multiplier = (is_per_channel) ? result_real_multiplier[i % cols_in] : result_real_multiplier[0];
164*c217d954SCole Faust
165*c217d954SCole Faust float_t result_f = static_cast<float_t>(result) * multiplier + static_cast<float_t>(result_offset);
166*c217d954SCole Faust result = static_cast<TIn>(support::cpp11::round(result_f));
167*c217d954SCole Faust
168*c217d954SCole Faust // Bounded ReLu
169*c217d954SCole Faust if(min != max)
170*c217d954SCole Faust {
171*c217d954SCole Faust result = std::max(min, std::min(max, result));
172*c217d954SCole Faust }
173*c217d954SCole Faust
174*c217d954SCole Faust (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
175*c217d954SCole Faust std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
176*c217d954SCole Faust }
177*c217d954SCole Faust }
178*c217d954SCole Faust } // namespace
179*c217d954SCole Faust
180*c217d954SCole Faust template <typename T_out, typename T_in, typename T_in_1>
gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> & a,const SimpleTensor<T_in_1> & b,TensorShape shape_c,int32_t a_offset,int32_t b_offset)181*c217d954SCole Faust SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset)
182*c217d954SCole Faust {
183*c217d954SCole Faust static_assert(std::is_same<typename std::decay<T_out>::type, int32_t>::value, "Only int32_t is allowed for the output");
184*c217d954SCole Faust
185*c217d954SCole Faust DataType dt = std::is_same<T_out, int32_t>::value ? DataType::S32 : DataType::U32;
186*c217d954SCole Faust SimpleTensor<T_out> c(shape_c, dt);
187*c217d954SCole Faust
188*c217d954SCole Faust const int K = a.shape().x();
189*c217d954SCole Faust const int M = a.shape().y();
190*c217d954SCole Faust const int N = b.shape().x();
191*c217d954SCole Faust const int D = a.shape().z(); // Number of matrices in a batch
192*c217d954SCole Faust
193*c217d954SCole Faust const int a_stride_z = K * M;
194*c217d954SCole Faust // Do not slide the matrix B along the 3rd dimension in case matrix B has less than 3 dimensions
195*c217d954SCole Faust const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0;
196*c217d954SCole Faust const int c_stride_z = N * M;
197*c217d954SCole Faust
198*c217d954SCole Faust std::vector<T_out> acc;
199*c217d954SCole Faust acc.resize(N);
200*c217d954SCole Faust
201*c217d954SCole Faust for(int depth = 0; depth < D; ++depth)
202*c217d954SCole Faust {
203*c217d954SCole Faust const int base_addr_a = depth * a_stride_z;
204*c217d954SCole Faust const int base_addr_b = depth * b_stride_z;
205*c217d954SCole Faust const int base_addr_c = depth * c_stride_z;
206*c217d954SCole Faust
207*c217d954SCole Faust for(int i = 0; i < M; ++i)
208*c217d954SCole Faust {
209*c217d954SCole Faust for(int j = 0; j < N; ++j)
210*c217d954SCole Faust {
211*c217d954SCole Faust acc[j] = 0;
212*c217d954SCole Faust }
213*c217d954SCole Faust for(int k = 0; k < K; ++k)
214*c217d954SCole Faust {
215*c217d954SCole Faust const T_out tmp_a = a_offset + static_cast<T_out>(a[base_addr_a + k + i * K]);
216*c217d954SCole Faust for(int j = 0; j < N; ++j)
217*c217d954SCole Faust {
218*c217d954SCole Faust const T_out tmp_b = b_offset + static_cast<T_out>(b[base_addr_b + j + k * N]);
219*c217d954SCole Faust const T_out mult_as_int = tmp_a * tmp_b;
220*c217d954SCole Faust acc[j] += mult_as_int;
221*c217d954SCole Faust }
222*c217d954SCole Faust }
223*c217d954SCole Faust for(int j = 0; j < N; ++j)
224*c217d954SCole Faust {
225*c217d954SCole Faust c[base_addr_c + j + i * N] = acc[j];
226*c217d954SCole Faust }
227*c217d954SCole Faust }
228*c217d954SCole Faust }
229*c217d954SCole Faust
230*c217d954SCole Faust return c;
231*c217d954SCole Faust }
232*c217d954SCole Faust
233*c217d954SCole Faust // used to validate assembly kernels which don't know anything about offsets
234*c217d954SCole Faust template <typename T1, typename T2, typename T3>
gemmlowp(const SimpleTensor<T2> & a,const SimpleTensor<T3> & b,TensorShape shape_c)235*c217d954SCole Faust SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c)
236*c217d954SCole Faust {
237*c217d954SCole Faust return gemmlowp_matrix_multiply_core<T1, T2, T3>(a, b, shape_c, 0, 0);
238*c217d954SCole Faust }
239*c217d954SCole Faust
240*c217d954SCole Faust template <typename TIn, typename TOut>
gemmlowp_quantize_down_scale(const SimpleTensor<TIn> & in,int32_t result_offset,std::vector<int32_t> result_mult_int,std::vector<int32_t> result_shift,int32_t min,int32_t max)241*c217d954SCole Faust SimpleTensor<TOut> gemmlowp_quantize_down_scale(const SimpleTensor<TIn> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift,
242*c217d954SCole Faust int32_t min, int32_t max)
243*c217d954SCole Faust {
244*c217d954SCole Faust SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
245*c217d954SCole Faust
246*c217d954SCole Faust quantize_down_scale<TIn, TOut>(&in, nullptr, &dst, result_offset, result_mult_int, result_shift, min, max);
247*c217d954SCole Faust
248*c217d954SCole Faust return dst;
249*c217d954SCole Faust }
250*c217d954SCole Faust
251*c217d954SCole Faust template <typename TIn, typename TOut>
gemmlowp_quantize_down_scale(const SimpleTensor<TIn> & in,const SimpleTensor<TIn> & bias,int32_t result_offset,std::vector<int32_t> result_mult_int,std::vector<int32_t> result_shift,int32_t min,int32_t max)252*c217d954SCole Faust SimpleTensor<TOut> gemmlowp_quantize_down_scale(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias, int32_t result_offset, std::vector<int32_t> result_mult_int,
253*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t min, int32_t max)
254*c217d954SCole Faust {
255*c217d954SCole Faust SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
256*c217d954SCole Faust
257*c217d954SCole Faust quantize_down_scale<TIn, TOut>(&in, &bias, &dst, result_offset, result_mult_int, result_shift, min, max);
258*c217d954SCole Faust
259*c217d954SCole Faust return dst;
260*c217d954SCole Faust }
261*c217d954SCole Faust
262*c217d954SCole Faust template <typename TIn, typename TOut>
gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> & in,std::vector<int32_t> result_fixedpoint_multiplier,std::vector<int32_t> result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max)263*c217d954SCole Faust SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, std::vector<int32_t> result_fixedpoint_multiplier, std::vector<int32_t> result_shift,
264*c217d954SCole Faust int32_t result_offset_after_shift, int32_t min, int32_t max)
265*c217d954SCole Faust {
266*c217d954SCole Faust SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
267*c217d954SCole Faust
268*c217d954SCole Faust quantize_down_scale_by_fixedpoint<TIn, TOut>(&in, nullptr, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
269*c217d954SCole Faust
270*c217d954SCole Faust return dst;
271*c217d954SCole Faust }
272*c217d954SCole Faust
273*c217d954SCole Faust template <typename TIn, typename TOut>
gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> & in,const SimpleTensor<TIn> & bias,std::vector<int32_t> result_fixedpoint_multiplier,std::vector<int32_t> result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max)274*c217d954SCole Faust SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias, std::vector<int32_t> result_fixedpoint_multiplier,
275*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)
276*c217d954SCole Faust {
277*c217d954SCole Faust SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
278*c217d954SCole Faust
279*c217d954SCole Faust quantize_down_scale_by_fixedpoint<TIn, TOut>(&in, &bias, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
280*c217d954SCole Faust
281*c217d954SCole Faust return dst;
282*c217d954SCole Faust }
283*c217d954SCole Faust
284*c217d954SCole Faust template <typename TIn, typename TOut>
gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> & in,const SimpleTensor<TIn> & bias,std::vector<float_t> result_real_multiplier,int32_t result_offset,int32_t min,int32_t max)285*c217d954SCole Faust SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias,
286*c217d954SCole Faust std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max)
287*c217d954SCole Faust {
288*c217d954SCole Faust SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
289*c217d954SCole Faust
290*c217d954SCole Faust quantize_down_scale_by_float<TIn, TOut>(&in, &bias, &dst, result_real_multiplier, result_offset, min, max);
291*c217d954SCole Faust
292*c217d954SCole Faust return dst;
293*c217d954SCole Faust }
294*c217d954SCole Faust
295*c217d954SCole Faust template <typename TIn, typename TOut>
gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> & in,std::vector<float_t> result_real_multiplier,int32_t result_offset,int32_t min,int32_t max)296*c217d954SCole Faust SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in,
297*c217d954SCole Faust std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max)
298*c217d954SCole Faust {
299*c217d954SCole Faust SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
300*c217d954SCole Faust
301*c217d954SCole Faust quantize_down_scale_by_float<TIn, TOut>(&in, nullptr, &dst, result_real_multiplier, result_offset, min, max);
302*c217d954SCole Faust
303*c217d954SCole Faust return dst;
304*c217d954SCole Faust }
305*c217d954SCole Faust
306*c217d954SCole Faust template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
307*c217d954SCole Faust std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
308*c217d954SCole Faust template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a,
309*c217d954SCole Faust std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
310*c217d954SCole Faust template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
311*c217d954SCole Faust std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
312*c217d954SCole Faust template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a,
313*c217d954SCole Faust std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
314*c217d954SCole Faust template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
315*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
316*c217d954SCole Faust template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
317*c217d954SCole Faust std::vector<int32_t> result_fixedpoint_multiplier,
318*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
319*c217d954SCole Faust template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
320*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
321*c217d954SCole Faust template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
322*c217d954SCole Faust std::vector<int32_t> result_fixedpoint_multiplier,
323*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
324*c217d954SCole Faust template SimpleTensor<int16_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
325*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
326*c217d954SCole Faust template SimpleTensor<int16_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
327*c217d954SCole Faust std::vector<int32_t> result_fixedpoint_multiplier,
328*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
329*c217d954SCole Faust template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int,
330*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t min, int32_t max);
331*c217d954SCole Faust template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int,
332*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t min, int32_t max);
333*c217d954SCole Faust template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int,
334*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t min, int32_t max);
335*c217d954SCole Faust template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int,
336*c217d954SCole Faust std::vector<int32_t> result_shift, int32_t min, int32_t max);
337*c217d954SCole Faust template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
338*c217d954SCole Faust template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
339*c217d954SCole Faust template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
340*c217d954SCole Faust template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c);
341*c217d954SCole Faust template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
342*c217d954SCole Faust } // namespace reference
343*c217d954SCole Faust } // namespace validation
344*c217d954SCole Faust } // namespace test
345*c217d954SCole Faust } // namespace arm_compute
346