1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #pragma once
10
11 #include <gtest/gtest.h>
12
13 #include <executorch/backends/vulkan/runtime/api/api.h>
14
15 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h>
16 #include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
17 #include <executorch/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h>
18
19 #define CREATE_FLOAT_TEXTURE(sizes, allocate_memory) \
20 vkcompute::api::vTensor( \
21 vkcompute::api::context(), \
22 sizes, \
23 vkapi::kFloat, \
24 utils::StorageType::TEXTURE_3D, \
25 utils::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, \
26 allocate_memory);
27
28 #define CREATE_FLOAT_BUFFER(sizes, allocate_memory) \
29 vkcompute::api::vTensor( \
30 vkcompute::api::context(), \
31 sizes, \
32 vkapi::kFloat, \
33 utils::StorageType::BUFFER, \
34 utils::GPUMemoryLayout::TENSOR_WIDTH_PACKED, \
35 allocate_memory);
36
37 #define DEFINE_STAGING_BUFFER_AND_RECORD_TO_GPU_FOR(tensor) \
38 vkcompute::api::StagingBuffer staging_buffer_##tensor( \
39 vkcompute::api::context(), \
40 vkapi::kFloat, \
41 tensor.staging_buffer_numel()); \
42 record_nchw_to_image_op( \
43 vkcompute::api::context(), staging_buffer_##tensor.buffer(), tensor);
44
45 #define DEFINE_STAGING_BUFFER_AND_RECORD_FROM_GPU_FOR(tensor) \
46 vkcompute::api::StagingBuffer staging_buffer_##tensor( \
47 vkcompute::api::context(), \
48 vkapi::kFloat, \
49 tensor.staging_buffer_numel()); \
50 record_image_to_nchw_op( \
51 vkcompute::api::context(), tensor, staging_buffer_##tensor.buffer());
52
53 #define CHECK_VALUE(data, idx, expected) \
54 do { \
55 if (data[idx] != expected) { \
56 std::cout << "Output at [" << idx << "] = " << data[idx] \
57 << ", does not match expected value " << expected \
58 << std::endl; \
59 } \
60 ASSERT_TRUE(data[idx] == expected); \
61 } while (false)
62
63 //
64 // Operator Recording
65 //
66
67 void record_nchw_to_buffer_op(
68 vkcompute::api::Context* const context,
69 vkcompute::vkapi::VulkanBuffer& src_buffer,
70 vkcompute::api::vTensor& v_dst);
71
72 void record_buffer_to_nchw_op(
73 vkcompute::api::Context* const context,
74 vkcompute::api::vTensor& v_src,
75 vkcompute::vkapi::VulkanBuffer& dst_buffer);
76
77 void record_nchw_to_image_op(
78 vkcompute::api::Context* const context,
79 vkcompute::vkapi::VulkanBuffer& src_buffer,
80 vkcompute::api::vTensor& v_dst);
81
82 void record_image_to_nchw_op(
83 vkcompute::api::Context* const context,
84 vkcompute::api::vTensor& v_src,
85 vkcompute::vkapi::VulkanBuffer& dst_buffer);
86
87 void record_bitw8_image_to_nchw_nobitw8buffer_op(
88 vkcompute::api::Context* const context,
89 vkcompute::api::vTensor& v_src,
90 vkcompute::api::StagingBuffer& dst_buffer);
91
92 void record_conv2d_prepack_weights_op(
93 vkcompute::api::Context* const context,
94 vkcompute::vkapi::VulkanBuffer& src_buffer,
95 vkcompute::api::vTensor& v_dst,
96 const std::vector<int64_t>& original_sizes,
97 const bool transposed);
98
99 void record_binary_op(
100 vkcompute::api::Context* const context,
101 const std::string& op_name,
102 vkcompute::api::vTensor& v_in1,
103 vkcompute::api::vTensor& v_in2,
104 vkcompute::api::vTensor& v_dst);
105
106 void execute_and_check_add(
107 vkcompute::api::vTensor& a,
108 vkcompute::api::vTensor& b,
109 vkcompute::api::vTensor& c,
110 float a_val,
111 float b_val);
112
113 void record_index_fill_buffer(
114 vkcompute::api::Context* const context,
115 vkcompute::api::vTensor& v_ten);
116
117 void record_scalar_add_buffer(
118 vkcompute::api::Context* context,
119 vkcompute::api::vTensor& v_ten,
120 float offset);
121
122 void record_reference_matmul(
123 vkcompute::api::Context* context,
124 vkcompute::api::vTensor& out,
125 vkcompute::api::vTensor& mat1,
126 vkcompute::api::vTensor& mat2);
127
128 void record_matmul_texture3d(
129 vkcompute::api::Context* context,
130 vkcompute::api::vTensor& out,
131 vkcompute::api::vTensor& mat1,
132 vkcompute::api::vTensor& mat2);
133
134 //
135 // Input & Output Utilities
136 //
137
138 inline void fill_staging(
139 vkcompute::api::StagingBuffer& staging,
140 float val,
141 int numel = -1) {
142 if (numel < 0) {
143 numel = staging.numel();
144 }
145 std::vector<float> data(numel);
146 std::fill(data.begin(), data.end(), val);
147 staging.copy_from(data.data(), sizeof(float) * numel);
148 }
149
150 void fill_vtensor(vkcompute::api::vTensor& vten, std::vector<float>& data);
151
152 void fill_vtensor(vkcompute::api::vTensor& vten, float val, bool iota = false);
153
154 std::vector<float> create_random_float_buffer(
155 const size_t numel,
156 const float min = 0,
157 const float max = 1);
158
159 std::vector<uint8_t> create_random_uint8_buffer(
160 const size_t numel,
161 const uint8_t min = 0,
162 const uint8_t max = 255);
163
164 void fill_vtensor(
165 vkcompute::ComputeGraph& graph,
166 const vkcompute::IOValueRef idx,
167 float val,
168 bool iota = false);
169
170 void extract_vtensor(vkcompute::api::vTensor& vten, std::vector<float>& data);
171
extract_vtensor(vkcompute::api::vTensor & vten)172 inline std::vector<float> extract_vtensor(vkcompute::api::vTensor& vten) {
173 std::vector<float> data_out(vten.staging_buffer_numel());
174 extract_vtensor(vten, data_out);
175 return data_out;
176 }
177
178 inline void check_staging_buffer(
179 vkcompute::api::StagingBuffer& staging,
180 float val,
181 int numel = -1) {
182 if (numel < 0) {
183 numel = staging.numel();
184 }
185 std::vector<float> data(numel);
186 staging.copy_to(data.data(), sizeof(float) * numel);
187
188 for (size_t i = 0; i < data.size(); ++i) {
189 CHECK_VALUE(data, i, val);
190 }
191 }
192
get_buf_idx(vkcompute::ComputeGraph & graph,vkcompute::IOValueRef ref,const std::vector<int64_t> & tensor_coor)193 inline int64_t get_buf_idx(
194 vkcompute::ComputeGraph& graph,
195 vkcompute::IOValueRef ref,
196 const std::vector<int64_t>& tensor_coor) {
197 vkcompute::vTensorPtr vten_ptr = graph.get_tensor(ref.value);
198
199 const std::vector<int64_t>& sizes = vten_ptr->sizes();
200
201 int64_t c = vkcompute::dim_at<vkcompute::kChannel4D>(sizes);
202 int64_t h = vkcompute::dim_at<vkcompute::kHeight4D>(sizes);
203 int64_t w = vkcompute::dim_at<vkcompute::kWidth4D>(sizes);
204
205 int64_t ni = vkcompute::dim_at<vkcompute::kBatch4D>(tensor_coor);
206 int64_t ci = vkcompute::dim_at<vkcompute::kChannel4D>(tensor_coor);
207 int64_t hi = vkcompute::dim_at<vkcompute::kHeight4D>(tensor_coor);
208 int64_t wi = vkcompute::dim_at<vkcompute::kWidth4D>(tensor_coor);
209
210 return (ni * c * h * w + ci * h * w + hi * w + wi);
211 }
212
213 //
214 // Context Management
215 //
216
217 void submit_to_gpu();
218
219 vkcompute::vkapi::Allocation allocate_memory_for(
220 const vkcompute::api::vTensor& vten);
221
222 VmaTotalStatistics get_vma_stats();
223
224 size_t get_vma_allocation_count();
225
226 //
227 // Graph Test Utilities
228 //
229
230 void execute_graph_and_check_output(
231 vkcompute::ComputeGraph& graph,
232 std::vector<float> input_vals,
233 std::vector<float> expected_outputs);
234
235 //
236 // Debugging Utilities
237 //
238
239 #define PRINT_DATA(vec) \
240 do { \
241 std::cout << #vec << ": "; \
242 print_vector(vec); \
243 } while (false);
244
245 #define PRINT_DATA_RANGE(vec, start, range) \
246 do { \
247 std::cout << #vec << "[" << start << ", " << (start + range) << "]: "; \
248 print_vector(vec, start, range); \
249 } while (false);
250
251 template <typename T>
252 void print_vector(
253 const std::vector<T>& data,
254 size_t start = 0,
255 size_t range = 20) {
256 size_t end = data.size();
257 if (range >= 1) {
258 end = std::min(data.size(), start + range);
259 }
260 for (size_t i = start; i < end; ++i) {
261 std::cout << data.at(i) << ", ";
262 }
263 std::cout << std::endl;
264 }
265
266 //
267 // Misc. Utilities
268 //
269
270 bool check_close(float a, float b, float atol = 1e-4, float rtol = 1e-5);
271