xref: /aosp_15_r20/external/executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Qualcomm Innovation Center, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 #include <executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
9 #include <executorch/backends/qualcomm/runtime/Logging.h>
10 #include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
11 #include <pybind11/numpy.h>
12 #include <pybind11/pybind11.h>
13 #include <pybind11/stl.h>
14 
15 #include <string>
16 
17 namespace py = pybind11;
18 namespace executorch {
19 namespace backends {
20 namespace qnn {
CreateQuantizationParamWrapper(const Qnn_QuantizationEncoding_t & encoding,py::dict & quant_info)21 std::unique_ptr<QuantizeParamsWrapper> CreateQuantizationParamWrapper(
22     const Qnn_QuantizationEncoding_t& encoding,
23     py::dict& quant_info) {
24   std::unique_ptr<QuantizeParamsWrapper> quantize_param_wrapper;
25   if (encoding == QNN_QUANTIZATION_ENCODING_UNDEFINED) {
26     quantize_param_wrapper = std::make_unique<UndefinedQuantizeParamsWrapper>();
27   } else if (encoding == QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
28     int32_t axis = quant_info["axis"].cast<int32_t>();
29     std::vector<Qnn_ScaleOffset_t> scale_offset =
30         quant_info["scale_offset"].cast<std::vector<Qnn_ScaleOffset_t>>();
31 
32     quantize_param_wrapper =
33         std::make_unique<AxisScaleOffsetQuantizeParamsWrapper>(
34             axis, scale_offset);
35   } else if (encoding == QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET) {
36     uint32_t bitwidth = quant_info["bitwidth"].cast<uint32_t>();
37     int32_t axis = quant_info["axis"].cast<int32_t>();
38     std::vector<Qnn_ScaleOffset_t> scale_offset =
39         quant_info["scale_offset"].cast<std::vector<Qnn_ScaleOffset_t>>();
40     uint32_t num_elements = scale_offset.size();
41     std::vector<float> scales;
42     std::vector<int32_t> offsets;
43     for (const auto& scale_offset : scale_offset) {
44       scales.push_back(scale_offset.scale);
45       offsets.push_back(scale_offset.offset);
46     }
47     quantize_param_wrapper =
48         std::make_unique<BwAxisScaleOffsetQuantizeParamsWrapper>(
49             bitwidth, axis, num_elements, scales, offsets);
50   } else if (encoding == QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET) {
51     uint32_t bitwidth = quant_info["bitwidth"].cast<uint32_t>();
52     float scale = quant_info["scale"].cast<float>();
53     int32_t offset = quant_info["offset"].cast<int32_t>();
54     quantize_param_wrapper =
55         std::make_unique<BwScaleOffsetQuantizeParamsWrapper>(
56             bitwidth, scale, offset);
57   } else if (encoding == QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
58     float scale = quant_info["scale"].cast<float>();
59     int32_t offset = quant_info["offset"].cast<int32_t>();
60     quantize_param_wrapper =
61         std::make_unique<ScaleOffsetQuantizeParamsWrapper>(scale, offset);
62   } else {
63     QNN_EXECUTORCH_LOG_ERROR(
64         "Unknown the encoding of quantization: %d", encoding);
65   }
66   return quantize_param_wrapper;
67 }
68 
CreateTensorWrapper(const std::string & tensor_name,Qnn_TensorType_t tensor_type,Qnn_DataType_t data_type,const Qnn_QuantizationEncoding_t & encoding,py::dict & quant_info,std::uint32_t rank,const std::vector<uint32_t> & dims,py::array & data,bool copy_data)69 std::shared_ptr<TensorWrapper> CreateTensorWrapper(
70     const std::string& tensor_name,
71     Qnn_TensorType_t tensor_type,
72     Qnn_DataType_t data_type,
73     const Qnn_QuantizationEncoding_t& encoding,
74     py::dict& quant_info,
75     std::uint32_t rank,
76     const std::vector<uint32_t>& dims,
77     py::array& data,
78     bool copy_data) {
79   std::unique_ptr<QuantizeParamsWrapper> quantize_param_wrapper =
80       CreateQuantizationParamWrapper(encoding, quant_info);
81 
82   if (data.size() == 0) {
83     return CreateTensorWrapper(
84         tensor_name,
85         tensor_type,
86         data_type,
87         std::move(quantize_param_wrapper),
88         rank,
89         dims.data(),
90         0,
91         nullptr,
92         copy_data);
93   }
94   return CreateTensorWrapper(
95       tensor_name,
96       tensor_type,
97       data_type,
98       std::move(quantize_param_wrapper),
99       rank,
100       dims.data(),
101       0,
102       data.data(),
103       copy_data);
104 }
105 
PYBIND11_MODULE(PyQnnWrapperAdaptor,m)106 PYBIND11_MODULE(PyQnnWrapperAdaptor, m) {
107   PYBIND11_NUMPY_DTYPE(PyQnnTensorWrapper::EncodingData, scale, offset);
108 
109   py::enum_<Qnn_TensorType_t>(m, "Qnn_TensorType_t")
110       .value(
111           "QNN_TENSOR_TYPE_APP_WRITE",
112           Qnn_TensorType_t::QNN_TENSOR_TYPE_APP_WRITE)
113       .value(
114           "QNN_TENSOR_TYPE_APP_READ",
115           Qnn_TensorType_t::QNN_TENSOR_TYPE_APP_READ)
116       .value(
117           "QNN_TENSOR_TYPE_APP_READWRITE",
118           Qnn_TensorType_t::QNN_TENSOR_TYPE_APP_READWRITE)
119       .value("QNN_TENSOR_TYPE_NATIVE", Qnn_TensorType_t::QNN_TENSOR_TYPE_NATIVE)
120       .value("QNN_TENSOR_TYPE_STATIC", Qnn_TensorType_t::QNN_TENSOR_TYPE_STATIC)
121       .value("QNN_TENSOR_TYPE_NULL", Qnn_TensorType_t::QNN_TENSOR_TYPE_NULL)
122       .value(
123           "QNN_TENSOR_TYPE_UNDEFINED",
124           Qnn_TensorType_t::QNN_TENSOR_TYPE_UNDEFINED)
125       .export_values();
126 
127   py::enum_<Qnn_DataType_t>(m, "Qnn_DataType_t")
128       .value("QNN_DATATYPE_INT_8", Qnn_DataType_t::QNN_DATATYPE_INT_8)
129       .value("QNN_DATATYPE_INT_16", Qnn_DataType_t::QNN_DATATYPE_INT_16)
130       .value("QNN_DATATYPE_INT_32", Qnn_DataType_t::QNN_DATATYPE_INT_32)
131       .value("QNN_DATATYPE_INT_64", Qnn_DataType_t::QNN_DATATYPE_INT_64)
132       .value("QNN_DATATYPE_UINT_8", Qnn_DataType_t::QNN_DATATYPE_UINT_8)
133       .value("QNN_DATATYPE_UINT_16", Qnn_DataType_t::QNN_DATATYPE_UINT_16)
134       .value("QNN_DATATYPE_UINT_32", Qnn_DataType_t::QNN_DATATYPE_UINT_32)
135       .value("QNN_DATATYPE_UINT_64", Qnn_DataType_t::QNN_DATATYPE_UINT_64)
136       .value("QNN_DATATYPE_FLOAT_16", Qnn_DataType_t::QNN_DATATYPE_FLOAT_16)
137       .value("QNN_DATATYPE_FLOAT_32", Qnn_DataType_t::QNN_DATATYPE_FLOAT_32)
138       .value(
139           "QNN_DATATYPE_SFIXED_POINT_8",
140           Qnn_DataType_t::QNN_DATATYPE_SFIXED_POINT_8)
141       .value(
142           "QNN_DATATYPE_SFIXED_POINT_16",
143           Qnn_DataType_t::QNN_DATATYPE_SFIXED_POINT_16)
144       .value(
145           "QNN_DATATYPE_SFIXED_POINT_32",
146           Qnn_DataType_t::QNN_DATATYPE_SFIXED_POINT_32)
147       .value(
148           "QNN_DATATYPE_UFIXED_POINT_8",
149           Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_8)
150       .value(
151           "QNN_DATATYPE_UFIXED_POINT_16",
152           Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_16)
153       .value(
154           "QNN_DATATYPE_UFIXED_POINT_32",
155           Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_32)
156       .value("QNN_DATATYPE_BOOL_8", Qnn_DataType_t::QNN_DATATYPE_BOOL_8)
157       .value("QNN_DATATYPE_UNDEFINED", Qnn_DataType_t::QNN_DATATYPE_UNDEFINED)
158       .export_values();
159 
160   py::enum_<Qnn_QuantizationEncoding_t>(m, "Qnn_QuantizationEncoding_t")
161       .value(
162           "QNN_QUANTIZATION_ENCODING_UNDEFINED",
163           Qnn_QuantizationEncoding_t::QNN_QUANTIZATION_ENCODING_UNDEFINED)
164       .value(
165           "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
166           Qnn_QuantizationEncoding_t::QNN_QUANTIZATION_ENCODING_SCALE_OFFSET)
167       .value(
168           "QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET",
169           Qnn_QuantizationEncoding_t::
170               QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET)
171       .value(
172           "QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET",
173           Qnn_QuantizationEncoding_t::QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET)
174       .value(
175           "QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET",
176           Qnn_QuantizationEncoding_t::
177               QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET)
178       .export_values();
179   py::class_<OpWrapper, std::shared_ptr<OpWrapper>>(m, "OpWrapper")
180       .def(py::init<
181            const std::string&,
182            const std::string&,
183            const std::string&>());
184 
185   py::class_<TensorWrapper, std::shared_ptr<TensorWrapper>>(m, "TensorWrapper")
186       .def(py::init(py::overload_cast<
187                     const std::string&,
188                     Qnn_TensorType_t,
189                     Qnn_DataType_t,
190                     const Qnn_QuantizationEncoding_t&,
191                     py::dict&,
192                     std::uint32_t,
193                     const std::vector<uint32_t>&,
194                     py::array&,
195                     bool>(&CreateTensorWrapper)));
196 
197   py::class_<QuantizeParamsWrapper>(m, "QuantizeParamsWrapper");
198 
199   py::class_<Qnn_ScaleOffset_t>(m, "Qnn_ScaleOffset_t")
200       .def(py::init<float, int32_t>());
201 
202   py::class_<PyQnnOpWrapper, std::shared_ptr<PyQnnOpWrapper>>(
203       m, "PyQnnOpWrapper")
204       .def(py::init<
205            const std::string&,
206            const std::string&,
207            const std::string&>())
208       .def(
209           "AddInputTensors",
210           &PyQnnOpWrapper::AddInputTensors,
211           "A function which add input tensor wrapper into op wrapper",
212           py::arg("tensors"))
213       .def(
214           "AddOutputTensors",
215           &PyQnnOpWrapper::AddOutputTensors,
216           "A function which add output tensor wrapper into op wrapper",
217           py::arg("tensors"))
218       .def(
219           "AddTensorParam",
220           &PyQnnOpWrapper::AddTensorParam,
221           "A function which add tensor parameter into op wrapper",
222           py::arg("name"),
223           py::arg("data_type"),
224           py::arg("rank"),
225           py::arg("dims"),
226           py::arg("data"),
227           py::arg("copy_data"))
228       .def(
229           "AddScalarParam",
230           &PyQnnOpWrapper::AddScalarParam,
231           "A function which add scalar parameter into op wrapper",
232           py::arg("name"),
233           py::arg("data_type"),
234           py::arg("attrData"))
235       .def(
236           "GetOpWrapper",
237           &PyQnnOpWrapper::GetOpWrapper,
238           "A function which get op wrapper");
239 
240   py::class_<PyQnnTensorWrapper::Encoding>(m, "Encoding")
241       .def_readonly("data", &PyQnnTensorWrapper::Encoding::data)
242       .def_readonly("axis", &PyQnnTensorWrapper::Encoding::axis);
243 
244   py::class_<PyQnnTensorWrapper, std::shared_ptr<PyQnnTensorWrapper>>(
245       m, "PyQnnTensorWrapper")
246       .def(py::init<const std::shared_ptr<TensorWrapper>&>())
247       .def("GetDims", &PyQnnTensorWrapper::GetDims)
248       .def("GetDataType", &PyQnnTensorWrapper::GetDataType)
249       .def("GetName", &PyQnnTensorWrapper::GetName)
250       .def("GetEncodings", &PyQnnTensorWrapper::GetEncodings);
251 }
252 } // namespace qnn
253 } // namespace backends
254 } // namespace executorch
255