1 /*
2 * Copyright (c) Qualcomm Innovation Center, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8 #include <executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
9 #include <executorch/backends/qualcomm/runtime/Logging.h>
10 #include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
11 #include <pybind11/numpy.h>
12 #include <pybind11/pybind11.h>
13 #include <pybind11/stl.h>
14
15 #include <string>
16
17 namespace py = pybind11;
18 namespace executorch {
19 namespace backends {
20 namespace qnn {
CreateQuantizationParamWrapper(const Qnn_QuantizationEncoding_t & encoding,py::dict & quant_info)21 std::unique_ptr<QuantizeParamsWrapper> CreateQuantizationParamWrapper(
22 const Qnn_QuantizationEncoding_t& encoding,
23 py::dict& quant_info) {
24 std::unique_ptr<QuantizeParamsWrapper> quantize_param_wrapper;
25 if (encoding == QNN_QUANTIZATION_ENCODING_UNDEFINED) {
26 quantize_param_wrapper = std::make_unique<UndefinedQuantizeParamsWrapper>();
27 } else if (encoding == QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
28 int32_t axis = quant_info["axis"].cast<int32_t>();
29 std::vector<Qnn_ScaleOffset_t> scale_offset =
30 quant_info["scale_offset"].cast<std::vector<Qnn_ScaleOffset_t>>();
31
32 quantize_param_wrapper =
33 std::make_unique<AxisScaleOffsetQuantizeParamsWrapper>(
34 axis, scale_offset);
35 } else if (encoding == QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET) {
36 uint32_t bitwidth = quant_info["bitwidth"].cast<uint32_t>();
37 int32_t axis = quant_info["axis"].cast<int32_t>();
38 std::vector<Qnn_ScaleOffset_t> scale_offset =
39 quant_info["scale_offset"].cast<std::vector<Qnn_ScaleOffset_t>>();
40 uint32_t num_elements = scale_offset.size();
41 std::vector<float> scales;
42 std::vector<int32_t> offsets;
43 for (const auto& scale_offset : scale_offset) {
44 scales.push_back(scale_offset.scale);
45 offsets.push_back(scale_offset.offset);
46 }
47 quantize_param_wrapper =
48 std::make_unique<BwAxisScaleOffsetQuantizeParamsWrapper>(
49 bitwidth, axis, num_elements, scales, offsets);
50 } else if (encoding == QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET) {
51 uint32_t bitwidth = quant_info["bitwidth"].cast<uint32_t>();
52 float scale = quant_info["scale"].cast<float>();
53 int32_t offset = quant_info["offset"].cast<int32_t>();
54 quantize_param_wrapper =
55 std::make_unique<BwScaleOffsetQuantizeParamsWrapper>(
56 bitwidth, scale, offset);
57 } else if (encoding == QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
58 float scale = quant_info["scale"].cast<float>();
59 int32_t offset = quant_info["offset"].cast<int32_t>();
60 quantize_param_wrapper =
61 std::make_unique<ScaleOffsetQuantizeParamsWrapper>(scale, offset);
62 } else {
63 QNN_EXECUTORCH_LOG_ERROR(
64 "Unknown the encoding of quantization: %d", encoding);
65 }
66 return quantize_param_wrapper;
67 }
68
CreateTensorWrapper(const std::string & tensor_name,Qnn_TensorType_t tensor_type,Qnn_DataType_t data_type,const Qnn_QuantizationEncoding_t & encoding,py::dict & quant_info,std::uint32_t rank,const std::vector<uint32_t> & dims,py::array & data,bool copy_data)69 std::shared_ptr<TensorWrapper> CreateTensorWrapper(
70 const std::string& tensor_name,
71 Qnn_TensorType_t tensor_type,
72 Qnn_DataType_t data_type,
73 const Qnn_QuantizationEncoding_t& encoding,
74 py::dict& quant_info,
75 std::uint32_t rank,
76 const std::vector<uint32_t>& dims,
77 py::array& data,
78 bool copy_data) {
79 std::unique_ptr<QuantizeParamsWrapper> quantize_param_wrapper =
80 CreateQuantizationParamWrapper(encoding, quant_info);
81
82 if (data.size() == 0) {
83 return CreateTensorWrapper(
84 tensor_name,
85 tensor_type,
86 data_type,
87 std::move(quantize_param_wrapper),
88 rank,
89 dims.data(),
90 0,
91 nullptr,
92 copy_data);
93 }
94 return CreateTensorWrapper(
95 tensor_name,
96 tensor_type,
97 data_type,
98 std::move(quantize_param_wrapper),
99 rank,
100 dims.data(),
101 0,
102 data.data(),
103 copy_data);
104 }
105
PYBIND11_MODULE(PyQnnWrapperAdaptor,m)106 PYBIND11_MODULE(PyQnnWrapperAdaptor, m) {
107 PYBIND11_NUMPY_DTYPE(PyQnnTensorWrapper::EncodingData, scale, offset);
108
109 py::enum_<Qnn_TensorType_t>(m, "Qnn_TensorType_t")
110 .value(
111 "QNN_TENSOR_TYPE_APP_WRITE",
112 Qnn_TensorType_t::QNN_TENSOR_TYPE_APP_WRITE)
113 .value(
114 "QNN_TENSOR_TYPE_APP_READ",
115 Qnn_TensorType_t::QNN_TENSOR_TYPE_APP_READ)
116 .value(
117 "QNN_TENSOR_TYPE_APP_READWRITE",
118 Qnn_TensorType_t::QNN_TENSOR_TYPE_APP_READWRITE)
119 .value("QNN_TENSOR_TYPE_NATIVE", Qnn_TensorType_t::QNN_TENSOR_TYPE_NATIVE)
120 .value("QNN_TENSOR_TYPE_STATIC", Qnn_TensorType_t::QNN_TENSOR_TYPE_STATIC)
121 .value("QNN_TENSOR_TYPE_NULL", Qnn_TensorType_t::QNN_TENSOR_TYPE_NULL)
122 .value(
123 "QNN_TENSOR_TYPE_UNDEFINED",
124 Qnn_TensorType_t::QNN_TENSOR_TYPE_UNDEFINED)
125 .export_values();
126
127 py::enum_<Qnn_DataType_t>(m, "Qnn_DataType_t")
128 .value("QNN_DATATYPE_INT_8", Qnn_DataType_t::QNN_DATATYPE_INT_8)
129 .value("QNN_DATATYPE_INT_16", Qnn_DataType_t::QNN_DATATYPE_INT_16)
130 .value("QNN_DATATYPE_INT_32", Qnn_DataType_t::QNN_DATATYPE_INT_32)
131 .value("QNN_DATATYPE_INT_64", Qnn_DataType_t::QNN_DATATYPE_INT_64)
132 .value("QNN_DATATYPE_UINT_8", Qnn_DataType_t::QNN_DATATYPE_UINT_8)
133 .value("QNN_DATATYPE_UINT_16", Qnn_DataType_t::QNN_DATATYPE_UINT_16)
134 .value("QNN_DATATYPE_UINT_32", Qnn_DataType_t::QNN_DATATYPE_UINT_32)
135 .value("QNN_DATATYPE_UINT_64", Qnn_DataType_t::QNN_DATATYPE_UINT_64)
136 .value("QNN_DATATYPE_FLOAT_16", Qnn_DataType_t::QNN_DATATYPE_FLOAT_16)
137 .value("QNN_DATATYPE_FLOAT_32", Qnn_DataType_t::QNN_DATATYPE_FLOAT_32)
138 .value(
139 "QNN_DATATYPE_SFIXED_POINT_8",
140 Qnn_DataType_t::QNN_DATATYPE_SFIXED_POINT_8)
141 .value(
142 "QNN_DATATYPE_SFIXED_POINT_16",
143 Qnn_DataType_t::QNN_DATATYPE_SFIXED_POINT_16)
144 .value(
145 "QNN_DATATYPE_SFIXED_POINT_32",
146 Qnn_DataType_t::QNN_DATATYPE_SFIXED_POINT_32)
147 .value(
148 "QNN_DATATYPE_UFIXED_POINT_8",
149 Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_8)
150 .value(
151 "QNN_DATATYPE_UFIXED_POINT_16",
152 Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_16)
153 .value(
154 "QNN_DATATYPE_UFIXED_POINT_32",
155 Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_32)
156 .value("QNN_DATATYPE_BOOL_8", Qnn_DataType_t::QNN_DATATYPE_BOOL_8)
157 .value("QNN_DATATYPE_UNDEFINED", Qnn_DataType_t::QNN_DATATYPE_UNDEFINED)
158 .export_values();
159
160 py::enum_<Qnn_QuantizationEncoding_t>(m, "Qnn_QuantizationEncoding_t")
161 .value(
162 "QNN_QUANTIZATION_ENCODING_UNDEFINED",
163 Qnn_QuantizationEncoding_t::QNN_QUANTIZATION_ENCODING_UNDEFINED)
164 .value(
165 "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
166 Qnn_QuantizationEncoding_t::QNN_QUANTIZATION_ENCODING_SCALE_OFFSET)
167 .value(
168 "QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET",
169 Qnn_QuantizationEncoding_t::
170 QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET)
171 .value(
172 "QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET",
173 Qnn_QuantizationEncoding_t::QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET)
174 .value(
175 "QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET",
176 Qnn_QuantizationEncoding_t::
177 QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET)
178 .export_values();
179 py::class_<OpWrapper, std::shared_ptr<OpWrapper>>(m, "OpWrapper")
180 .def(py::init<
181 const std::string&,
182 const std::string&,
183 const std::string&>());
184
185 py::class_<TensorWrapper, std::shared_ptr<TensorWrapper>>(m, "TensorWrapper")
186 .def(py::init(py::overload_cast<
187 const std::string&,
188 Qnn_TensorType_t,
189 Qnn_DataType_t,
190 const Qnn_QuantizationEncoding_t&,
191 py::dict&,
192 std::uint32_t,
193 const std::vector<uint32_t>&,
194 py::array&,
195 bool>(&CreateTensorWrapper)));
196
197 py::class_<QuantizeParamsWrapper>(m, "QuantizeParamsWrapper");
198
199 py::class_<Qnn_ScaleOffset_t>(m, "Qnn_ScaleOffset_t")
200 .def(py::init<float, int32_t>());
201
202 py::class_<PyQnnOpWrapper, std::shared_ptr<PyQnnOpWrapper>>(
203 m, "PyQnnOpWrapper")
204 .def(py::init<
205 const std::string&,
206 const std::string&,
207 const std::string&>())
208 .def(
209 "AddInputTensors",
210 &PyQnnOpWrapper::AddInputTensors,
211 "A function which add input tensor wrapper into op wrapper",
212 py::arg("tensors"))
213 .def(
214 "AddOutputTensors",
215 &PyQnnOpWrapper::AddOutputTensors,
216 "A function which add output tensor wrapper into op wrapper",
217 py::arg("tensors"))
218 .def(
219 "AddTensorParam",
220 &PyQnnOpWrapper::AddTensorParam,
221 "A function which add tensor parameter into op wrapper",
222 py::arg("name"),
223 py::arg("data_type"),
224 py::arg("rank"),
225 py::arg("dims"),
226 py::arg("data"),
227 py::arg("copy_data"))
228 .def(
229 "AddScalarParam",
230 &PyQnnOpWrapper::AddScalarParam,
231 "A function which add scalar parameter into op wrapper",
232 py::arg("name"),
233 py::arg("data_type"),
234 py::arg("attrData"))
235 .def(
236 "GetOpWrapper",
237 &PyQnnOpWrapper::GetOpWrapper,
238 "A function which get op wrapper");
239
240 py::class_<PyQnnTensorWrapper::Encoding>(m, "Encoding")
241 .def_readonly("data", &PyQnnTensorWrapper::Encoding::data)
242 .def_readonly("axis", &PyQnnTensorWrapper::Encoding::axis);
243
244 py::class_<PyQnnTensorWrapper, std::shared_ptr<PyQnnTensorWrapper>>(
245 m, "PyQnnTensorWrapper")
246 .def(py::init<const std::shared_ptr<TensorWrapper>&>())
247 .def("GetDims", &PyQnnTensorWrapper::GetDims)
248 .def("GetDataType", &PyQnnTensorWrapper::GetDataType)
249 .def("GetName", &PyQnnTensorWrapper::GetName)
250 .def("GetEncodings", &PyQnnTensorWrapper::GetEncodings);
251 }
252 } // namespace qnn
253 } // namespace backends
254 } // namespace executorch
255