xref: /aosp_15_r20/external/OpenCL-CTS/test_common/harness/kernelHelpers.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1*6467f958SSadaf Ebrahimi //
2*6467f958SSadaf Ebrahimi // Copyright (c) 2017 The Khronos Group Inc.
3*6467f958SSadaf Ebrahimi //
4*6467f958SSadaf Ebrahimi // Licensed under the Apache License, Version 2.0 (the "License");
5*6467f958SSadaf Ebrahimi // you may not use this file except in compliance with the License.
6*6467f958SSadaf Ebrahimi // You may obtain a copy of the License at
7*6467f958SSadaf Ebrahimi //
8*6467f958SSadaf Ebrahimi //    http://www.apache.org/licenses/LICENSE-2.0
9*6467f958SSadaf Ebrahimi //
10*6467f958SSadaf Ebrahimi // Unless required by applicable law or agreed to in writing, software
11*6467f958SSadaf Ebrahimi // distributed under the License is distributed on an "AS IS" BASIS,
12*6467f958SSadaf Ebrahimi // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6467f958SSadaf Ebrahimi // See the License for the specific language governing permissions and
14*6467f958SSadaf Ebrahimi // limitations under the License.
15*6467f958SSadaf Ebrahimi //
16*6467f958SSadaf Ebrahimi #include "crc32.h"
17*6467f958SSadaf Ebrahimi #include "kernelHelpers.h"
18*6467f958SSadaf Ebrahimi #include "deviceInfo.h"
19*6467f958SSadaf Ebrahimi #include "errorHelpers.h"
20*6467f958SSadaf Ebrahimi #include "imageHelpers.h"
21*6467f958SSadaf Ebrahimi #include "typeWrappers.h"
22*6467f958SSadaf Ebrahimi #include "testHarness.h"
23*6467f958SSadaf Ebrahimi #include "parseParameters.h"
24*6467f958SSadaf Ebrahimi 
25*6467f958SSadaf Ebrahimi #include <cassert>
26*6467f958SSadaf Ebrahimi #include <vector>
27*6467f958SSadaf Ebrahimi #include <string>
28*6467f958SSadaf Ebrahimi #include <fstream>
29*6467f958SSadaf Ebrahimi #include <sstream>
30*6467f958SSadaf Ebrahimi #include <iomanip>
31*6467f958SSadaf Ebrahimi #include <mutex>
32*6467f958SSadaf Ebrahimi #include <algorithm>
33*6467f958SSadaf Ebrahimi 
34*6467f958SSadaf Ebrahimi #if defined(_WIN32)
35*6467f958SSadaf Ebrahimi std::string slash = "\\";
36*6467f958SSadaf Ebrahimi #else
37*6467f958SSadaf Ebrahimi std::string slash = "/";
38*6467f958SSadaf Ebrahimi #endif
39*6467f958SSadaf Ebrahimi 
40*6467f958SSadaf Ebrahimi static std::mutex gCompilerMutex;
41*6467f958SSadaf Ebrahimi 
42*6467f958SSadaf Ebrahimi static cl_int get_first_device_id(const cl_context context,
43*6467f958SSadaf Ebrahimi                                   cl_device_id &device);
44*6467f958SSadaf Ebrahimi 
get_file_size(const std::string & fileName)45*6467f958SSadaf Ebrahimi long get_file_size(const std::string &fileName)
46*6467f958SSadaf Ebrahimi {
47*6467f958SSadaf Ebrahimi     std::ifstream ifs(fileName.c_str(), std::ios::binary);
48*6467f958SSadaf Ebrahimi     if (!ifs.good()) return 0;
49*6467f958SSadaf Ebrahimi     // get length of file:
50*6467f958SSadaf Ebrahimi     ifs.seekg(0, std::ios::end);
51*6467f958SSadaf Ebrahimi     std::ios::pos_type length = ifs.tellg();
52*6467f958SSadaf Ebrahimi     return static_cast<long>(length);
53*6467f958SSadaf Ebrahimi }
54*6467f958SSadaf Ebrahimi 
get_kernel_content(unsigned int numKernelLines,const char * const * kernelProgram)55*6467f958SSadaf Ebrahimi static std::string get_kernel_content(unsigned int numKernelLines,
56*6467f958SSadaf Ebrahimi                                       const char *const *kernelProgram)
57*6467f958SSadaf Ebrahimi {
58*6467f958SSadaf Ebrahimi     std::string kernel;
59*6467f958SSadaf Ebrahimi     for (size_t i = 0; i < numKernelLines; ++i)
60*6467f958SSadaf Ebrahimi     {
61*6467f958SSadaf Ebrahimi         std::string chunk(kernelProgram[i], 0, std::string::npos);
62*6467f958SSadaf Ebrahimi         kernel += chunk;
63*6467f958SSadaf Ebrahimi     }
64*6467f958SSadaf Ebrahimi 
65*6467f958SSadaf Ebrahimi     return kernel;
66*6467f958SSadaf Ebrahimi }
67*6467f958SSadaf Ebrahimi 
get_kernel_name(const std::string & source)68*6467f958SSadaf Ebrahimi std::string get_kernel_name(const std::string &source)
69*6467f958SSadaf Ebrahimi {
70*6467f958SSadaf Ebrahimi     // Create list of kernel names
71*6467f958SSadaf Ebrahimi     std::string kernelsList;
72*6467f958SSadaf Ebrahimi     size_t kPos = source.find("kernel");
73*6467f958SSadaf Ebrahimi     while (kPos != std::string::npos)
74*6467f958SSadaf Ebrahimi     {
75*6467f958SSadaf Ebrahimi         // check for '__kernel'
76*6467f958SSadaf Ebrahimi         size_t pos = kPos;
77*6467f958SSadaf Ebrahimi         if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
78*6467f958SSadaf Ebrahimi             pos -= 2;
79*6467f958SSadaf Ebrahimi 
80*6467f958SSadaf Ebrahimi         // check character before 'kernel' (white space expected)
81*6467f958SSadaf Ebrahimi         size_t wsPos = source.find_last_of(" \t\r\n", pos);
82*6467f958SSadaf Ebrahimi         if (wsPos == std::string::npos || wsPos + 1 == pos)
83*6467f958SSadaf Ebrahimi         {
84*6467f958SSadaf Ebrahimi             // check character after 'kernel' (white space expected)
85*6467f958SSadaf Ebrahimi             size_t akPos = kPos + sizeof("kernel") - 1;
86*6467f958SSadaf Ebrahimi             wsPos = source.find_first_of(" \t\r\n", akPos);
87*6467f958SSadaf Ebrahimi             if (!(wsPos == akPos))
88*6467f958SSadaf Ebrahimi             {
89*6467f958SSadaf Ebrahimi                 kPos = source.find("kernel", kPos + 1);
90*6467f958SSadaf Ebrahimi                 continue;
91*6467f958SSadaf Ebrahimi             }
92*6467f958SSadaf Ebrahimi 
93*6467f958SSadaf Ebrahimi             bool attributeFound;
94*6467f958SSadaf Ebrahimi             do
95*6467f958SSadaf Ebrahimi             {
96*6467f958SSadaf Ebrahimi                 attributeFound = false;
97*6467f958SSadaf Ebrahimi                 // find '(' after kernel name name
98*6467f958SSadaf Ebrahimi                 size_t pPos = source.find("(", akPos);
99*6467f958SSadaf Ebrahimi                 if (!(pPos != std::string::npos)) continue;
100*6467f958SSadaf Ebrahimi 
101*6467f958SSadaf Ebrahimi                 // check for not empty kernel name before '('
102*6467f958SSadaf Ebrahimi                 pos = source.find_last_not_of(" \t\r\n", pPos - 1);
103*6467f958SSadaf Ebrahimi                 if (!(pos != std::string::npos && pos > akPos)) continue;
104*6467f958SSadaf Ebrahimi 
105*6467f958SSadaf Ebrahimi                 // find character before kernel name
106*6467f958SSadaf Ebrahimi                 wsPos = source.find_last_of(" \t\r\n", pos);
107*6467f958SSadaf Ebrahimi                 if (!(wsPos != std::string::npos && wsPos >= akPos)) continue;
108*6467f958SSadaf Ebrahimi 
109*6467f958SSadaf Ebrahimi                 std::string name =
110*6467f958SSadaf Ebrahimi                     source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
111*6467f958SSadaf Ebrahimi                 // check for kernel attribute
112*6467f958SSadaf Ebrahimi                 if (name == "__attribute__")
113*6467f958SSadaf Ebrahimi                 {
114*6467f958SSadaf Ebrahimi                     attributeFound = true;
115*6467f958SSadaf Ebrahimi                     int pCount = 1;
116*6467f958SSadaf Ebrahimi                     akPos = pPos + 1;
117*6467f958SSadaf Ebrahimi                     while (pCount > 0 && akPos != std::string::npos)
118*6467f958SSadaf Ebrahimi                     {
119*6467f958SSadaf Ebrahimi                         akPos = source.find_first_of("()", akPos + 1);
120*6467f958SSadaf Ebrahimi                         if (akPos != std::string::npos)
121*6467f958SSadaf Ebrahimi                         {
122*6467f958SSadaf Ebrahimi                             if (source[akPos] == '(')
123*6467f958SSadaf Ebrahimi                                 pCount++;
124*6467f958SSadaf Ebrahimi                             else
125*6467f958SSadaf Ebrahimi                                 pCount--;
126*6467f958SSadaf Ebrahimi                         }
127*6467f958SSadaf Ebrahimi                     }
128*6467f958SSadaf Ebrahimi                 }
129*6467f958SSadaf Ebrahimi                 else
130*6467f958SSadaf Ebrahimi                 {
131*6467f958SSadaf Ebrahimi                     kernelsList += name + ".";
132*6467f958SSadaf Ebrahimi                 }
133*6467f958SSadaf Ebrahimi             } while (attributeFound);
134*6467f958SSadaf Ebrahimi         }
135*6467f958SSadaf Ebrahimi         kPos = source.find("kernel", kPos + 1);
136*6467f958SSadaf Ebrahimi     }
137*6467f958SSadaf Ebrahimi     std::ostringstream oss;
138*6467f958SSadaf Ebrahimi     if (MAX_LEN_FOR_KERNEL_LIST > 0)
139*6467f958SSadaf Ebrahimi     {
140*6467f958SSadaf Ebrahimi         if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1)
141*6467f958SSadaf Ebrahimi         {
142*6467f958SSadaf Ebrahimi             kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
143*6467f958SSadaf Ebrahimi             kernelsList[kernelsList.size() - 1] = '.';
144*6467f958SSadaf Ebrahimi             kernelsList[kernelsList.size() - 1] = '.';
145*6467f958SSadaf Ebrahimi         }
146*6467f958SSadaf Ebrahimi         oss << kernelsList;
147*6467f958SSadaf Ebrahimi     }
148*6467f958SSadaf Ebrahimi     return oss.str();
149*6467f958SSadaf Ebrahimi }
150*6467f958SSadaf Ebrahimi 
151*6467f958SSadaf Ebrahimi static std::string
get_offline_compilation_file_type_str(const CompilationMode compilationMode)152*6467f958SSadaf Ebrahimi get_offline_compilation_file_type_str(const CompilationMode compilationMode)
153*6467f958SSadaf Ebrahimi {
154*6467f958SSadaf Ebrahimi     switch (compilationMode)
155*6467f958SSadaf Ebrahimi     {
156*6467f958SSadaf Ebrahimi         default: assert(0 && "Invalid compilation mode"); abort();
157*6467f958SSadaf Ebrahimi         case kOnline:
158*6467f958SSadaf Ebrahimi             assert(0 && "Invalid compilation mode for offline compilation");
159*6467f958SSadaf Ebrahimi             abort();
160*6467f958SSadaf Ebrahimi         case kBinary: return "binary";
161*6467f958SSadaf Ebrahimi         case kSpir_v: return "SPIR-V";
162*6467f958SSadaf Ebrahimi     }
163*6467f958SSadaf Ebrahimi }
164*6467f958SSadaf Ebrahimi 
get_unique_filename_prefix(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)165*6467f958SSadaf Ebrahimi static std::string get_unique_filename_prefix(unsigned int numKernelLines,
166*6467f958SSadaf Ebrahimi                                               const char *const *kernelProgram,
167*6467f958SSadaf Ebrahimi                                               const char *buildOptions)
168*6467f958SSadaf Ebrahimi {
169*6467f958SSadaf Ebrahimi     std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
170*6467f958SSadaf Ebrahimi     std::string kernelName = get_kernel_name(kernel);
171*6467f958SSadaf Ebrahimi     cl_uint kernelCrc = crc32(kernel.data(), kernel.size());
172*6467f958SSadaf Ebrahimi     std::ostringstream oss;
173*6467f958SSadaf Ebrahimi     oss << kernelName << std::hex << std::setfill('0') << std::setw(8)
174*6467f958SSadaf Ebrahimi         << kernelCrc;
175*6467f958SSadaf Ebrahimi     if (buildOptions)
176*6467f958SSadaf Ebrahimi     {
177*6467f958SSadaf Ebrahimi         cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
178*6467f958SSadaf Ebrahimi         oss << '.' << std::hex << std::setfill('0') << std::setw(8)
179*6467f958SSadaf Ebrahimi             << bOptionsCrc;
180*6467f958SSadaf Ebrahimi     }
181*6467f958SSadaf Ebrahimi     return oss.str();
182*6467f958SSadaf Ebrahimi }
183*6467f958SSadaf Ebrahimi 
184*6467f958SSadaf Ebrahimi 
185*6467f958SSadaf Ebrahimi static std::string
get_cl_build_options_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)186*6467f958SSadaf Ebrahimi get_cl_build_options_filename_with_path(const std::string &filePath,
187*6467f958SSadaf Ebrahimi                                         const std::string &fileNamePrefix)
188*6467f958SSadaf Ebrahimi {
189*6467f958SSadaf Ebrahimi     return filePath + slash + fileNamePrefix + ".options";
190*6467f958SSadaf Ebrahimi }
191*6467f958SSadaf Ebrahimi 
192*6467f958SSadaf Ebrahimi static std::string
get_cl_source_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)193*6467f958SSadaf Ebrahimi get_cl_source_filename_with_path(const std::string &filePath,
194*6467f958SSadaf Ebrahimi                                  const std::string &fileNamePrefix)
195*6467f958SSadaf Ebrahimi {
196*6467f958SSadaf Ebrahimi     return filePath + slash + fileNamePrefix + ".cl";
197*6467f958SSadaf Ebrahimi }
198*6467f958SSadaf Ebrahimi 
199*6467f958SSadaf Ebrahimi static std::string
get_binary_filename_with_path(CompilationMode mode,cl_uint deviceAddrSpaceSize,const std::string & filePath,const std::string & fileNamePrefix)200*6467f958SSadaf Ebrahimi get_binary_filename_with_path(CompilationMode mode, cl_uint deviceAddrSpaceSize,
201*6467f958SSadaf Ebrahimi                               const std::string &filePath,
202*6467f958SSadaf Ebrahimi                               const std::string &fileNamePrefix)
203*6467f958SSadaf Ebrahimi {
204*6467f958SSadaf Ebrahimi     std::string binaryFilename = filePath + slash + fileNamePrefix;
205*6467f958SSadaf Ebrahimi     if (kSpir_v == mode)
206*6467f958SSadaf Ebrahimi     {
207*6467f958SSadaf Ebrahimi         std::ostringstream extension;
208*6467f958SSadaf Ebrahimi         extension << ".spv" << deviceAddrSpaceSize;
209*6467f958SSadaf Ebrahimi         binaryFilename += extension.str();
210*6467f958SSadaf Ebrahimi     }
211*6467f958SSadaf Ebrahimi     return binaryFilename;
212*6467f958SSadaf Ebrahimi }
213*6467f958SSadaf Ebrahimi 
file_exist_on_disk(const std::string & filePath,const std::string & fileName)214*6467f958SSadaf Ebrahimi static bool file_exist_on_disk(const std::string &filePath,
215*6467f958SSadaf Ebrahimi                                const std::string &fileName)
216*6467f958SSadaf Ebrahimi {
217*6467f958SSadaf Ebrahimi     std::string fileNameWithPath = filePath + slash + fileName;
218*6467f958SSadaf Ebrahimi     bool exist = false;
219*6467f958SSadaf Ebrahimi     std::ifstream ifs;
220*6467f958SSadaf Ebrahimi 
221*6467f958SSadaf Ebrahimi     ifs.open(fileNameWithPath.c_str(), std::ios::binary);
222*6467f958SSadaf Ebrahimi     if (ifs.good()) exist = true;
223*6467f958SSadaf Ebrahimi     ifs.close();
224*6467f958SSadaf Ebrahimi     return exist;
225*6467f958SSadaf Ebrahimi }
226*6467f958SSadaf Ebrahimi 
should_save_kernel_source_to_disk(CompilationMode mode,CompilationCacheMode cacheMode,const std::string & binaryPath,const std::string & binaryName)227*6467f958SSadaf Ebrahimi static bool should_save_kernel_source_to_disk(CompilationMode mode,
228*6467f958SSadaf Ebrahimi                                               CompilationCacheMode cacheMode,
229*6467f958SSadaf Ebrahimi                                               const std::string &binaryPath,
230*6467f958SSadaf Ebrahimi                                               const std::string &binaryName)
231*6467f958SSadaf Ebrahimi {
232*6467f958SSadaf Ebrahimi     bool saveToDisk = false;
233*6467f958SSadaf Ebrahimi     if (cacheMode == kCacheModeDumpCl
234*6467f958SSadaf Ebrahimi         || (cacheMode == kCacheModeOverwrite && mode != kOnline))
235*6467f958SSadaf Ebrahimi     {
236*6467f958SSadaf Ebrahimi         saveToDisk = true;
237*6467f958SSadaf Ebrahimi     }
238*6467f958SSadaf Ebrahimi     if (cacheMode == kCacheModeCompileIfAbsent && mode != kOnline)
239*6467f958SSadaf Ebrahimi     {
240*6467f958SSadaf Ebrahimi         saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
241*6467f958SSadaf Ebrahimi     }
242*6467f958SSadaf Ebrahimi     return saveToDisk;
243*6467f958SSadaf Ebrahimi }
244*6467f958SSadaf Ebrahimi 
save_kernel_build_options_to_disk(const std::string & path,const std::string & prefix,const char * buildOptions)245*6467f958SSadaf Ebrahimi static int save_kernel_build_options_to_disk(const std::string &path,
246*6467f958SSadaf Ebrahimi                                              const std::string &prefix,
247*6467f958SSadaf Ebrahimi                                              const char *buildOptions)
248*6467f958SSadaf Ebrahimi {
249*6467f958SSadaf Ebrahimi     std::string filename =
250*6467f958SSadaf Ebrahimi         get_cl_build_options_filename_with_path(path, prefix);
251*6467f958SSadaf Ebrahimi     std::ofstream ofs(filename.c_str(), std::ios::binary);
252*6467f958SSadaf Ebrahimi     if (!ofs.good())
253*6467f958SSadaf Ebrahimi     {
254*6467f958SSadaf Ebrahimi         log_info("Can't save kernel build options: %s\n", filename.c_str());
255*6467f958SSadaf Ebrahimi         return -1;
256*6467f958SSadaf Ebrahimi     }
257*6467f958SSadaf Ebrahimi     ofs.write(buildOptions, strlen(buildOptions));
258*6467f958SSadaf Ebrahimi     ofs.close();
259*6467f958SSadaf Ebrahimi     log_info("Saved kernel build options to file: %s\n", filename.c_str());
260*6467f958SSadaf Ebrahimi     return CL_SUCCESS;
261*6467f958SSadaf Ebrahimi }
262*6467f958SSadaf Ebrahimi 
save_kernel_source_to_disk(const std::string & path,const std::string & prefix,const std::string & source)263*6467f958SSadaf Ebrahimi static int save_kernel_source_to_disk(const std::string &path,
264*6467f958SSadaf Ebrahimi                                       const std::string &prefix,
265*6467f958SSadaf Ebrahimi                                       const std::string &source)
266*6467f958SSadaf Ebrahimi {
267*6467f958SSadaf Ebrahimi     std::string filename = get_cl_source_filename_with_path(path, prefix);
268*6467f958SSadaf Ebrahimi     std::ofstream ofs(filename.c_str(), std::ios::binary);
269*6467f958SSadaf Ebrahimi     if (!ofs.good())
270*6467f958SSadaf Ebrahimi     {
271*6467f958SSadaf Ebrahimi         log_info("Can't save kernel source: %s\n", filename.c_str());
272*6467f958SSadaf Ebrahimi         return -1;
273*6467f958SSadaf Ebrahimi     }
274*6467f958SSadaf Ebrahimi     ofs.write(source.c_str(), source.size());
275*6467f958SSadaf Ebrahimi     ofs.close();
276*6467f958SSadaf Ebrahimi     log_info("Saved kernel source to file: %s\n", filename.c_str());
277*6467f958SSadaf Ebrahimi     return CL_SUCCESS;
278*6467f958SSadaf Ebrahimi }
279*6467f958SSadaf Ebrahimi 
280*6467f958SSadaf Ebrahimi static int
save_kernel_source_and_options_to_disk(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)281*6467f958SSadaf Ebrahimi save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
282*6467f958SSadaf Ebrahimi                                        const char *const *kernelProgram,
283*6467f958SSadaf Ebrahimi                                        const char *buildOptions)
284*6467f958SSadaf Ebrahimi {
285*6467f958SSadaf Ebrahimi     int error;
286*6467f958SSadaf Ebrahimi 
287*6467f958SSadaf Ebrahimi     std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
288*6467f958SSadaf Ebrahimi     std::string kernelNamePrefix =
289*6467f958SSadaf Ebrahimi         get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
290*6467f958SSadaf Ebrahimi 
291*6467f958SSadaf Ebrahimi     // save kernel source to disk
292*6467f958SSadaf Ebrahimi     error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix,
293*6467f958SSadaf Ebrahimi                                        kernel);
294*6467f958SSadaf Ebrahimi 
295*6467f958SSadaf Ebrahimi     // save kernel build options to disk if exists
296*6467f958SSadaf Ebrahimi     if (buildOptions != NULL)
297*6467f958SSadaf Ebrahimi         error |= save_kernel_build_options_to_disk(
298*6467f958SSadaf Ebrahimi             gCompilationCachePath, kernelNamePrefix, buildOptions);
299*6467f958SSadaf Ebrahimi 
300*6467f958SSadaf Ebrahimi     return error;
301*6467f958SSadaf Ebrahimi }
302*6467f958SSadaf Ebrahimi 
303*6467f958SSadaf Ebrahimi static std::string
get_compilation_mode_str(const CompilationMode compilationMode)304*6467f958SSadaf Ebrahimi get_compilation_mode_str(const CompilationMode compilationMode)
305*6467f958SSadaf Ebrahimi {
306*6467f958SSadaf Ebrahimi     switch (compilationMode)
307*6467f958SSadaf Ebrahimi     {
308*6467f958SSadaf Ebrahimi         default: assert(0 && "Invalid compilation mode"); abort();
309*6467f958SSadaf Ebrahimi         case kOnline: return "online";
310*6467f958SSadaf Ebrahimi         case kBinary: return "binary";
311*6467f958SSadaf Ebrahimi         case kSpir_v: return "spir-v";
312*6467f958SSadaf Ebrahimi     }
313*6467f958SSadaf Ebrahimi }
314*6467f958SSadaf Ebrahimi 
get_cl_device_info_str(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfo)315*6467f958SSadaf Ebrahimi static cl_int get_cl_device_info_str(const cl_device_id device,
316*6467f958SSadaf Ebrahimi                                      const cl_uint device_address_space_size,
317*6467f958SSadaf Ebrahimi                                      const CompilationMode compilationMode,
318*6467f958SSadaf Ebrahimi                                      std::string &clDeviceInfo)
319*6467f958SSadaf Ebrahimi {
320*6467f958SSadaf Ebrahimi     std::string extensionsString = get_device_extensions_string(device);
321*6467f958SSadaf Ebrahimi     std::string versionString = get_device_version_string(device);
322*6467f958SSadaf Ebrahimi 
323*6467f958SSadaf Ebrahimi     std::ostringstream clDeviceInfoStream;
324*6467f958SSadaf Ebrahimi     std::string file_type =
325*6467f958SSadaf Ebrahimi         get_offline_compilation_file_type_str(compilationMode);
326*6467f958SSadaf Ebrahimi     clDeviceInfoStream << "# OpenCL device info affecting " << file_type
327*6467f958SSadaf Ebrahimi                        << " offline compilation:" << std::endl
328*6467f958SSadaf Ebrahimi                        << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size
329*6467f958SSadaf Ebrahimi                        << std::endl
330*6467f958SSadaf Ebrahimi                        << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\""
331*6467f958SSadaf Ebrahimi                        << std::endl;
332*6467f958SSadaf Ebrahimi     /* We only need the device's supported IL version(s) when compiling IL
333*6467f958SSadaf Ebrahimi      * that will be loaded with clCreateProgramWithIL() */
334*6467f958SSadaf Ebrahimi     if (compilationMode == kSpir_v)
335*6467f958SSadaf Ebrahimi     {
336*6467f958SSadaf Ebrahimi         std::string ilVersionString = get_device_il_version_string(device);
337*6467f958SSadaf Ebrahimi         clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString
338*6467f958SSadaf Ebrahimi                            << "\"" << std::endl;
339*6467f958SSadaf Ebrahimi     }
340*6467f958SSadaf Ebrahimi     clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\""
341*6467f958SSadaf Ebrahimi                        << std::endl;
342*6467f958SSadaf Ebrahimi     clDeviceInfoStream << "CL_DEVICE_IMAGE_SUPPORT="
343*6467f958SSadaf Ebrahimi                        << (0 == checkForImageSupport(device)) << std::endl;
344*6467f958SSadaf Ebrahimi     clDeviceInfoStream << "CL_DEVICE_NAME=\"" << get_device_name(device).c_str()
345*6467f958SSadaf Ebrahimi                        << "\"" << std::endl;
346*6467f958SSadaf Ebrahimi 
347*6467f958SSadaf Ebrahimi     clDeviceInfo = clDeviceInfoStream.str();
348*6467f958SSadaf Ebrahimi 
349*6467f958SSadaf Ebrahimi     return CL_SUCCESS;
350*6467f958SSadaf Ebrahimi }
351*6467f958SSadaf Ebrahimi 
write_cl_device_info(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfoFilename)352*6467f958SSadaf Ebrahimi static int write_cl_device_info(const cl_device_id device,
353*6467f958SSadaf Ebrahimi                                 const cl_uint device_address_space_size,
354*6467f958SSadaf Ebrahimi                                 const CompilationMode compilationMode,
355*6467f958SSadaf Ebrahimi                                 std::string &clDeviceInfoFilename)
356*6467f958SSadaf Ebrahimi {
357*6467f958SSadaf Ebrahimi     std::string clDeviceInfo;
358*6467f958SSadaf Ebrahimi     int error = get_cl_device_info_str(device, device_address_space_size,
359*6467f958SSadaf Ebrahimi                                        compilationMode, clDeviceInfo);
360*6467f958SSadaf Ebrahimi     if (error != CL_SUCCESS)
361*6467f958SSadaf Ebrahimi     {
362*6467f958SSadaf Ebrahimi         return error;
363*6467f958SSadaf Ebrahimi     }
364*6467f958SSadaf Ebrahimi 
365*6467f958SSadaf Ebrahimi     cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size());
366*6467f958SSadaf Ebrahimi 
367*6467f958SSadaf Ebrahimi     /* Get the filename for the clDeviceInfo file.
368*6467f958SSadaf Ebrahimi      * Note: the file includes the hash on its content, so it is usually
369*6467f958SSadaf Ebrahimi      * unnecessary to delete it. */
370*6467f958SSadaf Ebrahimi     std::ostringstream clDeviceInfoFilenameStream;
371*6467f958SSadaf Ebrahimi     clDeviceInfoFilenameStream << gCompilationCachePath << slash
372*6467f958SSadaf Ebrahimi                                << "clDeviceInfo-";
373*6467f958SSadaf Ebrahimi     clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8)
374*6467f958SSadaf Ebrahimi                                << crc << ".txt";
375*6467f958SSadaf Ebrahimi 
376*6467f958SSadaf Ebrahimi     clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
377*6467f958SSadaf Ebrahimi 
378*6467f958SSadaf Ebrahimi     if ((size_t)get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
379*6467f958SSadaf Ebrahimi     {
380*6467f958SSadaf Ebrahimi         /* The CL device info file has already been created.
381*6467f958SSadaf Ebrahimi          * Nothing to do. */
382*6467f958SSadaf Ebrahimi         return 0;
383*6467f958SSadaf Ebrahimi     }
384*6467f958SSadaf Ebrahimi 
385*6467f958SSadaf Ebrahimi     /* The file does not exist or its length is not as expected.
386*6467f958SSadaf Ebrahimi      * Create/overwrite it. */
387*6467f958SSadaf Ebrahimi     std::ofstream ofs(clDeviceInfoFilename);
388*6467f958SSadaf Ebrahimi     if (!ofs.good())
389*6467f958SSadaf Ebrahimi     {
390*6467f958SSadaf Ebrahimi         log_info("OfflineCompiler: can't create CL device info file: %s\n",
391*6467f958SSadaf Ebrahimi                  clDeviceInfoFilename.c_str());
392*6467f958SSadaf Ebrahimi         return -1;
393*6467f958SSadaf Ebrahimi     }
394*6467f958SSadaf Ebrahimi     ofs << clDeviceInfo;
395*6467f958SSadaf Ebrahimi     ofs.close();
396*6467f958SSadaf Ebrahimi 
397*6467f958SSadaf Ebrahimi     return CL_SUCCESS;
398*6467f958SSadaf Ebrahimi }
399*6467f958SSadaf Ebrahimi 
get_offline_compilation_command(const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename,const std::string & clDeviceInfoFilename)400*6467f958SSadaf Ebrahimi static std::string get_offline_compilation_command(
401*6467f958SSadaf Ebrahimi     const cl_uint device_address_space_size,
402*6467f958SSadaf Ebrahimi     const CompilationMode compilationMode, const std::string &bOptions,
403*6467f958SSadaf Ebrahimi     const std::string &sourceFilename, const std::string &outputFilename,
404*6467f958SSadaf Ebrahimi     const std::string &clDeviceInfoFilename)
405*6467f958SSadaf Ebrahimi {
406*6467f958SSadaf Ebrahimi     std::ostringstream wrapperOptions;
407*6467f958SSadaf Ebrahimi 
408*6467f958SSadaf Ebrahimi     wrapperOptions << gCompilationProgram
409*6467f958SSadaf Ebrahimi                    << " --mode=" << get_compilation_mode_str(compilationMode)
410*6467f958SSadaf Ebrahimi                    << " --source=" << sourceFilename
411*6467f958SSadaf Ebrahimi                    << " --output=" << outputFilename
412*6467f958SSadaf Ebrahimi                    << " --cl-device-info=" << clDeviceInfoFilename;
413*6467f958SSadaf Ebrahimi 
414*6467f958SSadaf Ebrahimi     if (bOptions != "")
415*6467f958SSadaf Ebrahimi     {
416*6467f958SSadaf Ebrahimi         // Add build options passed to this function
417*6467f958SSadaf Ebrahimi         wrapperOptions << " -- " << bOptions;
418*6467f958SSadaf Ebrahimi     }
419*6467f958SSadaf Ebrahimi 
420*6467f958SSadaf Ebrahimi     return wrapperOptions.str();
421*6467f958SSadaf Ebrahimi }
422*6467f958SSadaf Ebrahimi 
invoke_offline_compiler(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename)423*6467f958SSadaf Ebrahimi static int invoke_offline_compiler(const cl_device_id device,
424*6467f958SSadaf Ebrahimi                                    const cl_uint device_address_space_size,
425*6467f958SSadaf Ebrahimi                                    const CompilationMode compilationMode,
426*6467f958SSadaf Ebrahimi                                    const std::string &bOptions,
427*6467f958SSadaf Ebrahimi                                    const std::string &sourceFilename,
428*6467f958SSadaf Ebrahimi                                    const std::string &outputFilename)
429*6467f958SSadaf Ebrahimi {
430*6467f958SSadaf Ebrahimi     std::string runString;
431*6467f958SSadaf Ebrahimi     std::string clDeviceInfoFilename;
432*6467f958SSadaf Ebrahimi 
433*6467f958SSadaf Ebrahimi     // See cl_offline_compiler-interface.txt for a description of the
434*6467f958SSadaf Ebrahimi     // format of the CL device information file generated below, and
435*6467f958SSadaf Ebrahimi     // the internal command line interface for invoking the offline
436*6467f958SSadaf Ebrahimi     // compiler.
437*6467f958SSadaf Ebrahimi 
438*6467f958SSadaf Ebrahimi     cl_int err = write_cl_device_info(device, device_address_space_size,
439*6467f958SSadaf Ebrahimi                                       compilationMode, clDeviceInfoFilename);
440*6467f958SSadaf Ebrahimi     if (err != CL_SUCCESS)
441*6467f958SSadaf Ebrahimi     {
442*6467f958SSadaf Ebrahimi         log_error("Failed writing CL device info file\n");
443*6467f958SSadaf Ebrahimi         return err;
444*6467f958SSadaf Ebrahimi     }
445*6467f958SSadaf Ebrahimi 
446*6467f958SSadaf Ebrahimi     runString = get_offline_compilation_command(
447*6467f958SSadaf Ebrahimi         device_address_space_size, compilationMode, bOptions, sourceFilename,
448*6467f958SSadaf Ebrahimi         outputFilename, clDeviceInfoFilename);
449*6467f958SSadaf Ebrahimi 
450*6467f958SSadaf Ebrahimi     // execute script
451*6467f958SSadaf Ebrahimi     log_info("Executing command: %s\n", runString.c_str());
452*6467f958SSadaf Ebrahimi     fflush(stdout);
453*6467f958SSadaf Ebrahimi     int returnCode = system(runString.c_str());
454*6467f958SSadaf Ebrahimi     if (returnCode != 0)
455*6467f958SSadaf Ebrahimi     {
456*6467f958SSadaf Ebrahimi         log_error("ERROR: Command finished with error: 0x%x\n", returnCode);
457*6467f958SSadaf Ebrahimi         return CL_COMPILE_PROGRAM_FAILURE;
458*6467f958SSadaf Ebrahimi     }
459*6467f958SSadaf Ebrahimi 
460*6467f958SSadaf Ebrahimi     return CL_SUCCESS;
461*6467f958SSadaf Ebrahimi }
462*6467f958SSadaf Ebrahimi 
get_first_device_id(const cl_context context,cl_device_id & device)463*6467f958SSadaf Ebrahimi static cl_int get_first_device_id(const cl_context context,
464*6467f958SSadaf Ebrahimi                                   cl_device_id &device)
465*6467f958SSadaf Ebrahimi {
466*6467f958SSadaf Ebrahimi     cl_uint numDevices = 0;
467*6467f958SSadaf Ebrahimi     cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
468*6467f958SSadaf Ebrahimi                                     sizeof(cl_uint), &numDevices, NULL);
469*6467f958SSadaf Ebrahimi     test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
470*6467f958SSadaf Ebrahimi 
471*6467f958SSadaf Ebrahimi     if (numDevices == 0)
472*6467f958SSadaf Ebrahimi     {
473*6467f958SSadaf Ebrahimi         log_error("ERROR: No CL devices found\n");
474*6467f958SSadaf Ebrahimi         return -1;
475*6467f958SSadaf Ebrahimi     }
476*6467f958SSadaf Ebrahimi 
477*6467f958SSadaf Ebrahimi     std::vector<cl_device_id> devices(numDevices, 0);
478*6467f958SSadaf Ebrahimi     error =
479*6467f958SSadaf Ebrahimi         clGetContextInfo(context, CL_CONTEXT_DEVICES,
480*6467f958SSadaf Ebrahimi                          numDevices * sizeof(cl_device_id), &devices[0], NULL);
481*6467f958SSadaf Ebrahimi     test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
482*6467f958SSadaf Ebrahimi 
483*6467f958SSadaf Ebrahimi     device = devices[0];
484*6467f958SSadaf Ebrahimi     return CL_SUCCESS;
485*6467f958SSadaf Ebrahimi }
486*6467f958SSadaf Ebrahimi 
get_device_address_bits(const cl_device_id device,cl_uint & device_address_space_size)487*6467f958SSadaf Ebrahimi static cl_int get_device_address_bits(const cl_device_id device,
488*6467f958SSadaf Ebrahimi                                       cl_uint &device_address_space_size)
489*6467f958SSadaf Ebrahimi {
490*6467f958SSadaf Ebrahimi     cl_int error =
491*6467f958SSadaf Ebrahimi         clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
492*6467f958SSadaf Ebrahimi                         &device_address_space_size, NULL);
493*6467f958SSadaf Ebrahimi     test_error(error, "Unable to obtain device address bits");
494*6467f958SSadaf Ebrahimi 
495*6467f958SSadaf Ebrahimi     if (device_address_space_size != 32 && device_address_space_size != 64)
496*6467f958SSadaf Ebrahimi     {
497*6467f958SSadaf Ebrahimi         log_error("ERROR: Unexpected number of device address bits: %u\n",
498*6467f958SSadaf Ebrahimi                   device_address_space_size);
499*6467f958SSadaf Ebrahimi         return -1;
500*6467f958SSadaf Ebrahimi     }
501*6467f958SSadaf Ebrahimi 
502*6467f958SSadaf Ebrahimi     return CL_SUCCESS;
503*6467f958SSadaf Ebrahimi }
504*6467f958SSadaf Ebrahimi 
get_offline_compiler_output(std::ifstream & ifs,const cl_device_id device,cl_uint deviceAddrSpaceSize,const CompilationMode compilationMode,const std::string & bOptions,const std::string & kernelPath,const std::string & kernelNamePrefix)505*6467f958SSadaf Ebrahimi static int get_offline_compiler_output(
506*6467f958SSadaf Ebrahimi     std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize,
507*6467f958SSadaf Ebrahimi     const CompilationMode compilationMode, const std::string &bOptions,
508*6467f958SSadaf Ebrahimi     const std::string &kernelPath, const std::string &kernelNamePrefix)
509*6467f958SSadaf Ebrahimi {
510*6467f958SSadaf Ebrahimi     std::string sourceFilename =
511*6467f958SSadaf Ebrahimi         get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
512*6467f958SSadaf Ebrahimi     std::string outputFilename = get_binary_filename_with_path(
513*6467f958SSadaf Ebrahimi         compilationMode, deviceAddrSpaceSize, kernelPath, kernelNamePrefix);
514*6467f958SSadaf Ebrahimi 
515*6467f958SSadaf Ebrahimi     ifs.open(outputFilename.c_str(), std::ios::binary);
516*6467f958SSadaf Ebrahimi     if (!ifs.good())
517*6467f958SSadaf Ebrahimi     {
518*6467f958SSadaf Ebrahimi         std::string file_type =
519*6467f958SSadaf Ebrahimi             get_offline_compilation_file_type_str(compilationMode);
520*6467f958SSadaf Ebrahimi         if (gCompilationCacheMode == kCacheModeForceRead)
521*6467f958SSadaf Ebrahimi         {
522*6467f958SSadaf Ebrahimi             log_info("OfflineCompiler: can't open cached %s file: %s\n",
523*6467f958SSadaf Ebrahimi                      file_type.c_str(), outputFilename.c_str());
524*6467f958SSadaf Ebrahimi             return -1;
525*6467f958SSadaf Ebrahimi         }
526*6467f958SSadaf Ebrahimi         else
527*6467f958SSadaf Ebrahimi         {
528*6467f958SSadaf Ebrahimi             int error = invoke_offline_compiler(device, deviceAddrSpaceSize,
529*6467f958SSadaf Ebrahimi                                                 compilationMode, bOptions,
530*6467f958SSadaf Ebrahimi                                                 sourceFilename, outputFilename);
531*6467f958SSadaf Ebrahimi             if (error != CL_SUCCESS) return error;
532*6467f958SSadaf Ebrahimi 
533*6467f958SSadaf Ebrahimi             // open output file for reading
534*6467f958SSadaf Ebrahimi             ifs.open(outputFilename.c_str(), std::ios::binary);
535*6467f958SSadaf Ebrahimi             if (!ifs.good())
536*6467f958SSadaf Ebrahimi             {
537*6467f958SSadaf Ebrahimi                 log_info("OfflineCompiler: can't read generated %s file: %s\n",
538*6467f958SSadaf Ebrahimi                          file_type.c_str(), outputFilename.c_str());
539*6467f958SSadaf Ebrahimi                 return -1;
540*6467f958SSadaf Ebrahimi             }
541*6467f958SSadaf Ebrahimi         }
542*6467f958SSadaf Ebrahimi     }
543*6467f958SSadaf Ebrahimi 
544*6467f958SSadaf Ebrahimi     if (compilationMode == kSpir_v && !gDisableSPIRVValidation)
545*6467f958SSadaf Ebrahimi     {
546*6467f958SSadaf Ebrahimi         std::string runString = gSPIRVValidator + " " + outputFilename;
547*6467f958SSadaf Ebrahimi 
548*6467f958SSadaf Ebrahimi         int returnCode = system(runString.c_str());
549*6467f958SSadaf Ebrahimi         if (returnCode == -1)
550*6467f958SSadaf Ebrahimi         {
551*6467f958SSadaf Ebrahimi             log_error("Error: failed to invoke SPIR-V validator\n");
552*6467f958SSadaf Ebrahimi             return CL_COMPILE_PROGRAM_FAILURE;
553*6467f958SSadaf Ebrahimi         }
554*6467f958SSadaf Ebrahimi         else if (returnCode != 0)
555*6467f958SSadaf Ebrahimi         {
556*6467f958SSadaf Ebrahimi             log_error(
557*6467f958SSadaf Ebrahimi                 "Failed to validate SPIR-V file %s: system() returned 0x%x\n",
558*6467f958SSadaf Ebrahimi                 outputFilename.c_str(), returnCode);
559*6467f958SSadaf Ebrahimi             return CL_COMPILE_PROGRAM_FAILURE;
560*6467f958SSadaf Ebrahimi         }
561*6467f958SSadaf Ebrahimi     }
562*6467f958SSadaf Ebrahimi 
563*6467f958SSadaf Ebrahimi     return CL_SUCCESS;
564*6467f958SSadaf Ebrahimi }
565*6467f958SSadaf Ebrahimi 
create_single_kernel_helper_create_program_offline(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions,CompilationMode compilationMode)566*6467f958SSadaf Ebrahimi static int create_single_kernel_helper_create_program_offline(
567*6467f958SSadaf Ebrahimi     cl_context context, cl_device_id device, cl_program *outProgram,
568*6467f958SSadaf Ebrahimi     unsigned int numKernelLines, const char *const *kernelProgram,
569*6467f958SSadaf Ebrahimi     const char *buildOptions, CompilationMode compilationMode)
570*6467f958SSadaf Ebrahimi {
571*6467f958SSadaf Ebrahimi     if (kCacheModeDumpCl == gCompilationCacheMode)
572*6467f958SSadaf Ebrahimi     {
573*6467f958SSadaf Ebrahimi         return -1;
574*6467f958SSadaf Ebrahimi     }
575*6467f958SSadaf Ebrahimi 
576*6467f958SSadaf Ebrahimi     // Get device CL_DEVICE_ADDRESS_BITS
577*6467f958SSadaf Ebrahimi     int error;
578*6467f958SSadaf Ebrahimi     cl_uint device_address_space_size = 0;
579*6467f958SSadaf Ebrahimi     if (device == NULL)
580*6467f958SSadaf Ebrahimi     {
581*6467f958SSadaf Ebrahimi         error = get_first_device_id(context, device);
582*6467f958SSadaf Ebrahimi         test_error(error, "Failed to get device ID for first device");
583*6467f958SSadaf Ebrahimi     }
584*6467f958SSadaf Ebrahimi     error = get_device_address_bits(device, device_address_space_size);
585*6467f958SSadaf Ebrahimi     if (error != CL_SUCCESS) return error;
586*6467f958SSadaf Ebrahimi 
587*6467f958SSadaf Ebrahimi     // set build options
588*6467f958SSadaf Ebrahimi     std::string bOptions;
589*6467f958SSadaf Ebrahimi     bOptions += buildOptions ? std::string(buildOptions) : "";
590*6467f958SSadaf Ebrahimi 
591*6467f958SSadaf Ebrahimi     std::string kernelName =
592*6467f958SSadaf Ebrahimi         get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
593*6467f958SSadaf Ebrahimi 
594*6467f958SSadaf Ebrahimi 
595*6467f958SSadaf Ebrahimi     std::ifstream ifs;
596*6467f958SSadaf Ebrahimi     error = get_offline_compiler_output(ifs, device, device_address_space_size,
597*6467f958SSadaf Ebrahimi                                         compilationMode, bOptions,
598*6467f958SSadaf Ebrahimi                                         gCompilationCachePath, kernelName);
599*6467f958SSadaf Ebrahimi     if (error != CL_SUCCESS) return error;
600*6467f958SSadaf Ebrahimi 
601*6467f958SSadaf Ebrahimi     ifs.seekg(0, ifs.end);
602*6467f958SSadaf Ebrahimi     size_t length = static_cast<size_t>(ifs.tellg());
603*6467f958SSadaf Ebrahimi     ifs.seekg(0, ifs.beg);
604*6467f958SSadaf Ebrahimi 
605*6467f958SSadaf Ebrahimi     // treat modifiedProgram as input for clCreateProgramWithBinary
606*6467f958SSadaf Ebrahimi     if (compilationMode == kBinary)
607*6467f958SSadaf Ebrahimi     {
608*6467f958SSadaf Ebrahimi         // read binary from file:
609*6467f958SSadaf Ebrahimi         std::vector<unsigned char> modifiedKernelBuf(length);
610*6467f958SSadaf Ebrahimi 
611*6467f958SSadaf Ebrahimi         ifs.read((char *)&modifiedKernelBuf[0], length);
612*6467f958SSadaf Ebrahimi         ifs.close();
613*6467f958SSadaf Ebrahimi 
614*6467f958SSadaf Ebrahimi         size_t lengths = modifiedKernelBuf.size();
615*6467f958SSadaf Ebrahimi         const unsigned char *binaries = { &modifiedKernelBuf[0] };
616*6467f958SSadaf Ebrahimi         log_info("offlineCompiler: clCreateProgramWithSource replaced with "
617*6467f958SSadaf Ebrahimi                  "clCreateProgramWithBinary\n");
618*6467f958SSadaf Ebrahimi         *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths,
619*6467f958SSadaf Ebrahimi                                                 &binaries, NULL, &error);
620*6467f958SSadaf Ebrahimi         if (*outProgram == NULL || error != CL_SUCCESS)
621*6467f958SSadaf Ebrahimi         {
622*6467f958SSadaf Ebrahimi             print_error(error, "clCreateProgramWithBinary failed");
623*6467f958SSadaf Ebrahimi             return error;
624*6467f958SSadaf Ebrahimi         }
625*6467f958SSadaf Ebrahimi     }
626*6467f958SSadaf Ebrahimi     // treat modifiedProgram as input for clCreateProgramWithIL
627*6467f958SSadaf Ebrahimi     else if (compilationMode == kSpir_v)
628*6467f958SSadaf Ebrahimi     {
629*6467f958SSadaf Ebrahimi         // read spir-v from file:
630*6467f958SSadaf Ebrahimi         std::vector<unsigned char> modifiedKernelBuf(length);
631*6467f958SSadaf Ebrahimi 
632*6467f958SSadaf Ebrahimi         ifs.read((char *)&modifiedKernelBuf[0], length);
633*6467f958SSadaf Ebrahimi         ifs.close();
634*6467f958SSadaf Ebrahimi 
635*6467f958SSadaf Ebrahimi         size_t length = modifiedKernelBuf.size();
636*6467f958SSadaf Ebrahimi         log_info("offlineCompiler: clCreateProgramWithSource replaced with "
637*6467f958SSadaf Ebrahimi                  "clCreateProgramWithIL\n");
638*6467f958SSadaf Ebrahimi         if (gCoreILProgram)
639*6467f958SSadaf Ebrahimi         {
640*6467f958SSadaf Ebrahimi             *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0],
641*6467f958SSadaf Ebrahimi                                                 length, &error);
642*6467f958SSadaf Ebrahimi         }
643*6467f958SSadaf Ebrahimi         else
644*6467f958SSadaf Ebrahimi         {
645*6467f958SSadaf Ebrahimi             cl_platform_id platform;
646*6467f958SSadaf Ebrahimi             error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
647*6467f958SSadaf Ebrahimi                                     sizeof(cl_platform_id), &platform, NULL);
648*6467f958SSadaf Ebrahimi             test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
649*6467f958SSadaf Ebrahimi 
650*6467f958SSadaf Ebrahimi             clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
651*6467f958SSadaf Ebrahimi             clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
652*6467f958SSadaf Ebrahimi                 clGetExtensionFunctionAddressForPlatform(
653*6467f958SSadaf Ebrahimi                     platform, "clCreateProgramWithILKHR");
654*6467f958SSadaf Ebrahimi             if (clCreateProgramWithILKHR == NULL)
655*6467f958SSadaf Ebrahimi             {
656*6467f958SSadaf Ebrahimi                 log_error(
657*6467f958SSadaf Ebrahimi                     "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
658*6467f958SSadaf Ebrahimi                 return -1;
659*6467f958SSadaf Ebrahimi             }
660*6467f958SSadaf Ebrahimi             *outProgram = clCreateProgramWithILKHR(
661*6467f958SSadaf Ebrahimi                 context, &modifiedKernelBuf[0], length, &error);
662*6467f958SSadaf Ebrahimi         }
663*6467f958SSadaf Ebrahimi 
664*6467f958SSadaf Ebrahimi         if (*outProgram == NULL || error != CL_SUCCESS)
665*6467f958SSadaf Ebrahimi         {
666*6467f958SSadaf Ebrahimi             if (gCoreILProgram)
667*6467f958SSadaf Ebrahimi             {
668*6467f958SSadaf Ebrahimi                 print_error(error, "clCreateProgramWithIL failed");
669*6467f958SSadaf Ebrahimi             }
670*6467f958SSadaf Ebrahimi             else
671*6467f958SSadaf Ebrahimi             {
672*6467f958SSadaf Ebrahimi                 print_error(error, "clCreateProgramWithILKHR failed");
673*6467f958SSadaf Ebrahimi             }
674*6467f958SSadaf Ebrahimi             return error;
675*6467f958SSadaf Ebrahimi         }
676*6467f958SSadaf Ebrahimi     }
677*6467f958SSadaf Ebrahimi 
678*6467f958SSadaf Ebrahimi     return CL_SUCCESS;
679*6467f958SSadaf Ebrahimi }
680*6467f958SSadaf Ebrahimi 
create_single_kernel_helper_create_program(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions,CompilationMode compilationMode)681*6467f958SSadaf Ebrahimi static int create_single_kernel_helper_create_program(
682*6467f958SSadaf Ebrahimi     cl_context context, cl_device_id device, cl_program *outProgram,
683*6467f958SSadaf Ebrahimi     unsigned int numKernelLines, const char **kernelProgram,
684*6467f958SSadaf Ebrahimi     const char *buildOptions, CompilationMode compilationMode)
685*6467f958SSadaf Ebrahimi {
686*6467f958SSadaf Ebrahimi     std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
687*6467f958SSadaf Ebrahimi 
688*6467f958SSadaf Ebrahimi     std::string filePrefix =
689*6467f958SSadaf Ebrahimi         get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
690*6467f958SSadaf Ebrahimi     bool shouldSaveToDisk = should_save_kernel_source_to_disk(
691*6467f958SSadaf Ebrahimi         compilationMode, gCompilationCacheMode, gCompilationCachePath,
692*6467f958SSadaf Ebrahimi         filePrefix);
693*6467f958SSadaf Ebrahimi 
694*6467f958SSadaf Ebrahimi     if (shouldSaveToDisk)
695*6467f958SSadaf Ebrahimi     {
696*6467f958SSadaf Ebrahimi         if (CL_SUCCESS
697*6467f958SSadaf Ebrahimi             != save_kernel_source_and_options_to_disk(
698*6467f958SSadaf Ebrahimi                 numKernelLines, kernelProgram, buildOptions))
699*6467f958SSadaf Ebrahimi         {
700*6467f958SSadaf Ebrahimi             log_error("Unable to dump kernel source to disk");
701*6467f958SSadaf Ebrahimi             return -1;
702*6467f958SSadaf Ebrahimi         }
703*6467f958SSadaf Ebrahimi     }
704*6467f958SSadaf Ebrahimi     if (compilationMode == kOnline)
705*6467f958SSadaf Ebrahimi     {
706*6467f958SSadaf Ebrahimi         int error = CL_SUCCESS;
707*6467f958SSadaf Ebrahimi 
708*6467f958SSadaf Ebrahimi         /* Create the program object from source */
709*6467f958SSadaf Ebrahimi         *outProgram = clCreateProgramWithSource(context, numKernelLines,
710*6467f958SSadaf Ebrahimi                                                 kernelProgram, NULL, &error);
711*6467f958SSadaf Ebrahimi         if (*outProgram == NULL || error != CL_SUCCESS)
712*6467f958SSadaf Ebrahimi         {
713*6467f958SSadaf Ebrahimi             print_error(error, "clCreateProgramWithSource failed");
714*6467f958SSadaf Ebrahimi             return error;
715*6467f958SSadaf Ebrahimi         }
716*6467f958SSadaf Ebrahimi         return CL_SUCCESS;
717*6467f958SSadaf Ebrahimi     }
718*6467f958SSadaf Ebrahimi     else
719*6467f958SSadaf Ebrahimi     {
720*6467f958SSadaf Ebrahimi         return create_single_kernel_helper_create_program_offline(
721*6467f958SSadaf Ebrahimi             context, device, outProgram, numKernelLines, kernelProgram,
722*6467f958SSadaf Ebrahimi             buildOptions, compilationMode);
723*6467f958SSadaf Ebrahimi     }
724*6467f958SSadaf Ebrahimi }
725*6467f958SSadaf Ebrahimi 
create_single_kernel_helper_create_program(cl_context context,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)726*6467f958SSadaf Ebrahimi int create_single_kernel_helper_create_program(cl_context context,
727*6467f958SSadaf Ebrahimi                                                cl_program *outProgram,
728*6467f958SSadaf Ebrahimi                                                unsigned int numKernelLines,
729*6467f958SSadaf Ebrahimi                                                const char **kernelProgram,
730*6467f958SSadaf Ebrahimi                                                const char *buildOptions)
731*6467f958SSadaf Ebrahimi {
732*6467f958SSadaf Ebrahimi     return create_single_kernel_helper_create_program(
733*6467f958SSadaf Ebrahimi         context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
734*6467f958SSadaf Ebrahimi         gCompilationMode);
735*6467f958SSadaf Ebrahimi }
736*6467f958SSadaf Ebrahimi 
create_single_kernel_helper_create_program_for_device(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)737*6467f958SSadaf Ebrahimi int create_single_kernel_helper_create_program_for_device(
738*6467f958SSadaf Ebrahimi     cl_context context, cl_device_id device, cl_program *outProgram,
739*6467f958SSadaf Ebrahimi     unsigned int numKernelLines, const char **kernelProgram,
740*6467f958SSadaf Ebrahimi     const char *buildOptions)
741*6467f958SSadaf Ebrahimi {
742*6467f958SSadaf Ebrahimi     return create_single_kernel_helper_create_program(
743*6467f958SSadaf Ebrahimi         context, device, outProgram, numKernelLines, kernelProgram,
744*6467f958SSadaf Ebrahimi         buildOptions, gCompilationMode);
745*6467f958SSadaf Ebrahimi }
746*6467f958SSadaf Ebrahimi 
create_single_kernel_helper_with_build_options(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)747*6467f958SSadaf Ebrahimi int create_single_kernel_helper_with_build_options(
748*6467f958SSadaf Ebrahimi     cl_context context, cl_program *outProgram, cl_kernel *outKernel,
749*6467f958SSadaf Ebrahimi     unsigned int numKernelLines, const char **kernelProgram,
750*6467f958SSadaf Ebrahimi     const char *kernelName, const char *buildOptions)
751*6467f958SSadaf Ebrahimi {
752*6467f958SSadaf Ebrahimi     return create_single_kernel_helper(context, outProgram, outKernel,
753*6467f958SSadaf Ebrahimi                                        numKernelLines, kernelProgram,
754*6467f958SSadaf Ebrahimi                                        kernelName, buildOptions);
755*6467f958SSadaf Ebrahimi }
756*6467f958SSadaf Ebrahimi 
757*6467f958SSadaf Ebrahimi // Creates and builds OpenCL C/C++ program, and creates a kernel
create_single_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)758*6467f958SSadaf Ebrahimi int create_single_kernel_helper(cl_context context, cl_program *outProgram,
759*6467f958SSadaf Ebrahimi                                 cl_kernel *outKernel,
760*6467f958SSadaf Ebrahimi                                 unsigned int numKernelLines,
761*6467f958SSadaf Ebrahimi                                 const char **kernelProgram,
762*6467f958SSadaf Ebrahimi                                 const char *kernelName,
763*6467f958SSadaf Ebrahimi                                 const char *buildOptions)
764*6467f958SSadaf Ebrahimi {
765*6467f958SSadaf Ebrahimi     // For the logic that automatically adds -cl-std it is much cleaner if the
766*6467f958SSadaf Ebrahimi     // build options have RAII. This buffer will store the potentially updated
767*6467f958SSadaf Ebrahimi     // build options, in which case buildOptions will point at the string owned
768*6467f958SSadaf Ebrahimi     // by this buffer.
769*6467f958SSadaf Ebrahimi     std::string build_options_internal{ buildOptions ? buildOptions : "" };
770*6467f958SSadaf Ebrahimi 
771*6467f958SSadaf Ebrahimi     // Check the build options for the -cl-std option.
772*6467f958SSadaf Ebrahimi     if (!buildOptions || !strstr(buildOptions, "-cl-std"))
773*6467f958SSadaf Ebrahimi     {
774*6467f958SSadaf Ebrahimi         // If the build option isn't present add it using the latest OpenCL-C
775*6467f958SSadaf Ebrahimi         // version supported by the device. This allows calling code to force a
776*6467f958SSadaf Ebrahimi         // particular CL C version if it is required, but also means that
777*6467f958SSadaf Ebrahimi         // callers need not specify a version if they want to assume the most
778*6467f958SSadaf Ebrahimi         // recent CL C.
779*6467f958SSadaf Ebrahimi 
780*6467f958SSadaf Ebrahimi         auto version = get_max_OpenCL_C_for_context(context);
781*6467f958SSadaf Ebrahimi 
782*6467f958SSadaf Ebrahimi         std::string cl_std{};
783*6467f958SSadaf Ebrahimi         if (version >= Version(3, 0))
784*6467f958SSadaf Ebrahimi         {
785*6467f958SSadaf Ebrahimi             cl_std = "-cl-std=CL3.0";
786*6467f958SSadaf Ebrahimi         }
787*6467f958SSadaf Ebrahimi         else if (version >= Version(2, 0) && version < Version(3, 0))
788*6467f958SSadaf Ebrahimi         {
789*6467f958SSadaf Ebrahimi             cl_std = "-cl-std=CL2.0";
790*6467f958SSadaf Ebrahimi         }
791*6467f958SSadaf Ebrahimi         else
792*6467f958SSadaf Ebrahimi         {
793*6467f958SSadaf Ebrahimi             // If the -cl-std build option is not specified, the highest OpenCL
794*6467f958SSadaf Ebrahimi             // C 1.x language version supported by each device is used when
795*6467f958SSadaf Ebrahimi             // compiling the program for each device.
796*6467f958SSadaf Ebrahimi             cl_std = "";
797*6467f958SSadaf Ebrahimi         }
798*6467f958SSadaf Ebrahimi         build_options_internal += ' ';
799*6467f958SSadaf Ebrahimi         build_options_internal += cl_std;
800*6467f958SSadaf Ebrahimi         buildOptions = build_options_internal.c_str();
801*6467f958SSadaf Ebrahimi     }
802*6467f958SSadaf Ebrahimi     int error = create_single_kernel_helper_create_program(
803*6467f958SSadaf Ebrahimi         context, outProgram, numKernelLines, kernelProgram, buildOptions);
804*6467f958SSadaf Ebrahimi     if (error != CL_SUCCESS)
805*6467f958SSadaf Ebrahimi     {
806*6467f958SSadaf Ebrahimi         log_error("Create program failed: %d, line: %d\n", error, __LINE__);
807*6467f958SSadaf Ebrahimi         return error;
808*6467f958SSadaf Ebrahimi     }
809*6467f958SSadaf Ebrahimi 
810*6467f958SSadaf Ebrahimi     // Remove offline-compiler-only build options
811*6467f958SSadaf Ebrahimi     std::string newBuildOptions;
812*6467f958SSadaf Ebrahimi     if (buildOptions != NULL)
813*6467f958SSadaf Ebrahimi     {
814*6467f958SSadaf Ebrahimi         newBuildOptions = buildOptions;
815*6467f958SSadaf Ebrahimi         std::string offlineCompierOptions[] = {
816*6467f958SSadaf Ebrahimi             "-cl-fp16-enable", "-cl-fp64-enable", "-cl-zero-init-local-mem-vars"
817*6467f958SSadaf Ebrahimi         };
818*6467f958SSadaf Ebrahimi         for (auto &s : offlineCompierOptions)
819*6467f958SSadaf Ebrahimi         {
820*6467f958SSadaf Ebrahimi             std::string::size_type i = newBuildOptions.find(s);
821*6467f958SSadaf Ebrahimi             if (i != std::string::npos) newBuildOptions.erase(i, s.length());
822*6467f958SSadaf Ebrahimi         }
823*6467f958SSadaf Ebrahimi     }
824*6467f958SSadaf Ebrahimi     // Build program and create kernel
825*6467f958SSadaf Ebrahimi     return build_program_create_kernel_helper(
826*6467f958SSadaf Ebrahimi         context, outProgram, outKernel, numKernelLines, kernelProgram,
827*6467f958SSadaf Ebrahimi         kernelName, newBuildOptions.c_str());
828*6467f958SSadaf Ebrahimi }
829*6467f958SSadaf Ebrahimi 
830*6467f958SSadaf Ebrahimi // Builds OpenCL C/C++ program and creates
build_program_create_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)831*6467f958SSadaf Ebrahimi int build_program_create_kernel_helper(
832*6467f958SSadaf Ebrahimi     cl_context context, cl_program *outProgram, cl_kernel *outKernel,
833*6467f958SSadaf Ebrahimi     unsigned int numKernelLines, const char **kernelProgram,
834*6467f958SSadaf Ebrahimi     const char *kernelName, const char *buildOptions)
835*6467f958SSadaf Ebrahimi {
836*6467f958SSadaf Ebrahimi     int error;
837*6467f958SSadaf Ebrahimi     /* Compile the program */
838*6467f958SSadaf Ebrahimi     int buildProgramFailed = 0;
839*6467f958SSadaf Ebrahimi     int printedSource = 0;
840*6467f958SSadaf Ebrahimi     error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL);
841*6467f958SSadaf Ebrahimi     if (error != CL_SUCCESS)
842*6467f958SSadaf Ebrahimi     {
843*6467f958SSadaf Ebrahimi         unsigned int i;
844*6467f958SSadaf Ebrahimi         print_error(error, "clBuildProgram failed");
845*6467f958SSadaf Ebrahimi         buildProgramFailed = 1;
846*6467f958SSadaf Ebrahimi         printedSource = 1;
847*6467f958SSadaf Ebrahimi         log_error("Build options: %s\n", buildOptions);
848*6467f958SSadaf Ebrahimi         log_error("Original source is: ------------\n");
849*6467f958SSadaf Ebrahimi         for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]);
850*6467f958SSadaf Ebrahimi     }
851*6467f958SSadaf Ebrahimi 
852*6467f958SSadaf Ebrahimi     // Verify the build status on all devices
853*6467f958SSadaf Ebrahimi     cl_uint deviceCount = 0;
854*6467f958SSadaf Ebrahimi     error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES,
855*6467f958SSadaf Ebrahimi                              sizeof(deviceCount), &deviceCount, NULL);
856*6467f958SSadaf Ebrahimi     if (error != CL_SUCCESS)
857*6467f958SSadaf Ebrahimi     {
858*6467f958SSadaf Ebrahimi         print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
859*6467f958SSadaf Ebrahimi         return error;
860*6467f958SSadaf Ebrahimi     }
861*6467f958SSadaf Ebrahimi 
862*6467f958SSadaf Ebrahimi     if (deviceCount == 0)
863*6467f958SSadaf Ebrahimi     {
864*6467f958SSadaf Ebrahimi         log_error("No devices found for program.\n");
865*6467f958SSadaf Ebrahimi         return -1;
866*6467f958SSadaf Ebrahimi     }
867*6467f958SSadaf Ebrahimi 
868*6467f958SSadaf Ebrahimi     cl_device_id *devices =
869*6467f958SSadaf Ebrahimi         (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
870*6467f958SSadaf Ebrahimi     if (NULL == devices) return -1;
871*6467f958SSadaf Ebrahimi     BufferOwningPtr<cl_device_id> devicesBuf(devices);
872*6467f958SSadaf Ebrahimi 
873*6467f958SSadaf Ebrahimi     memset(devices, 0, deviceCount * sizeof(cl_device_id));
874*6467f958SSadaf Ebrahimi     error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES,
875*6467f958SSadaf Ebrahimi                              sizeof(cl_device_id) * deviceCount, devices, NULL);
876*6467f958SSadaf Ebrahimi     if (error != CL_SUCCESS)
877*6467f958SSadaf Ebrahimi     {
878*6467f958SSadaf Ebrahimi         print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
879*6467f958SSadaf Ebrahimi         return error;
880*6467f958SSadaf Ebrahimi     }
881*6467f958SSadaf Ebrahimi 
882*6467f958SSadaf Ebrahimi     cl_uint z;
883*6467f958SSadaf Ebrahimi     bool buildFailed = false;
884*6467f958SSadaf Ebrahimi     for (z = 0; z < deviceCount; z++)
885*6467f958SSadaf Ebrahimi     {
886*6467f958SSadaf Ebrahimi         char deviceName[4096] = "";
887*6467f958SSadaf Ebrahimi         error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName),
888*6467f958SSadaf Ebrahimi                                 deviceName, NULL);
889*6467f958SSadaf Ebrahimi         if (error != CL_SUCCESS || deviceName[0] == '\0')
890*6467f958SSadaf Ebrahimi         {
891*6467f958SSadaf Ebrahimi             log_error("Device \"%d\" failed to return a name\n", z);
892*6467f958SSadaf Ebrahimi             print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
893*6467f958SSadaf Ebrahimi         }
894*6467f958SSadaf Ebrahimi 
895*6467f958SSadaf Ebrahimi         cl_build_status buildStatus;
896*6467f958SSadaf Ebrahimi         error = clGetProgramBuildInfo(*outProgram, devices[z],
897*6467f958SSadaf Ebrahimi                                       CL_PROGRAM_BUILD_STATUS,
898*6467f958SSadaf Ebrahimi                                       sizeof(buildStatus), &buildStatus, NULL);
899*6467f958SSadaf Ebrahimi         if (error != CL_SUCCESS)
900*6467f958SSadaf Ebrahimi         {
901*6467f958SSadaf Ebrahimi             print_error(error,
902*6467f958SSadaf Ebrahimi                         "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
903*6467f958SSadaf Ebrahimi             return error;
904*6467f958SSadaf Ebrahimi         }
905*6467f958SSadaf Ebrahimi 
906*6467f958SSadaf Ebrahimi         if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed
907*6467f958SSadaf Ebrahimi             && deviceCount == 1)
908*6467f958SSadaf Ebrahimi         {
909*6467f958SSadaf Ebrahimi             buildFailed = true;
910*6467f958SSadaf Ebrahimi             log_error("clBuildProgram returned an error, but buildStatus is "
911*6467f958SSadaf Ebrahimi                       "marked as CL_BUILD_SUCCESS.\n");
912*6467f958SSadaf Ebrahimi         }
913*6467f958SSadaf Ebrahimi 
914*6467f958SSadaf Ebrahimi         if (buildStatus != CL_BUILD_SUCCESS)
915*6467f958SSadaf Ebrahimi         {
916*6467f958SSadaf Ebrahimi 
917*6467f958SSadaf Ebrahimi             char statusString[64] = "";
918*6467f958SSadaf Ebrahimi             if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
919*6467f958SSadaf Ebrahimi                 sprintf(statusString, "CL_BUILD_SUCCESS");
920*6467f958SSadaf Ebrahimi             else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
921*6467f958SSadaf Ebrahimi                 sprintf(statusString, "CL_BUILD_NONE");
922*6467f958SSadaf Ebrahimi             else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
923*6467f958SSadaf Ebrahimi                 sprintf(statusString, "CL_BUILD_ERROR");
924*6467f958SSadaf Ebrahimi             else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
925*6467f958SSadaf Ebrahimi                 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
926*6467f958SSadaf Ebrahimi             else
927*6467f958SSadaf Ebrahimi                 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
928*6467f958SSadaf Ebrahimi 
929*6467f958SSadaf Ebrahimi             if (buildStatus != CL_BUILD_SUCCESS)
930*6467f958SSadaf Ebrahimi                 log_error(
931*6467f958SSadaf Ebrahimi                     "Build not successful for device \"%s\", status: %s\n",
932*6467f958SSadaf Ebrahimi                     deviceName, statusString);
933*6467f958SSadaf Ebrahimi             size_t paramSize = 0;
934*6467f958SSadaf Ebrahimi             error = clGetProgramBuildInfo(*outProgram, devices[z],
935*6467f958SSadaf Ebrahimi                                           CL_PROGRAM_BUILD_LOG, 0, NULL,
936*6467f958SSadaf Ebrahimi                                           &paramSize);
937*6467f958SSadaf Ebrahimi             if (error != CL_SUCCESS)
938*6467f958SSadaf Ebrahimi             {
939*6467f958SSadaf Ebrahimi 
940*6467f958SSadaf Ebrahimi                 print_error(
941*6467f958SSadaf Ebrahimi                     error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
942*6467f958SSadaf Ebrahimi                 return error;
943*6467f958SSadaf Ebrahimi             }
944*6467f958SSadaf Ebrahimi 
945*6467f958SSadaf Ebrahimi             std::string log;
946*6467f958SSadaf Ebrahimi             log.resize(paramSize / sizeof(char));
947*6467f958SSadaf Ebrahimi             error = clGetProgramBuildInfo(*outProgram, devices[z],
948*6467f958SSadaf Ebrahimi                                           CL_PROGRAM_BUILD_LOG, paramSize,
949*6467f958SSadaf Ebrahimi                                           &log[0], NULL);
950*6467f958SSadaf Ebrahimi             if (error != CL_SUCCESS || log[0] == '\0')
951*6467f958SSadaf Ebrahimi             {
952*6467f958SSadaf Ebrahimi                 log_error("Device %d (%s) failed to return a build log\n", z,
953*6467f958SSadaf Ebrahimi                           deviceName);
954*6467f958SSadaf Ebrahimi                 if (error)
955*6467f958SSadaf Ebrahimi                 {
956*6467f958SSadaf Ebrahimi                     print_error(
957*6467f958SSadaf Ebrahimi                         error,
958*6467f958SSadaf Ebrahimi                         "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
959*6467f958SSadaf Ebrahimi                     return error;
960*6467f958SSadaf Ebrahimi                 }
961*6467f958SSadaf Ebrahimi                 else
962*6467f958SSadaf Ebrahimi                 {
963*6467f958SSadaf Ebrahimi                     log_error("clGetProgramBuildInfo returned an empty log.\n");
964*6467f958SSadaf Ebrahimi                     return -1;
965*6467f958SSadaf Ebrahimi                 }
966*6467f958SSadaf Ebrahimi             }
967*6467f958SSadaf Ebrahimi             // In this case we've already printed out the code above.
968*6467f958SSadaf Ebrahimi             if (!printedSource)
969*6467f958SSadaf Ebrahimi             {
970*6467f958SSadaf Ebrahimi                 unsigned int i;
971*6467f958SSadaf Ebrahimi                 log_error("Original source is: ------------\n");
972*6467f958SSadaf Ebrahimi                 for (i = 0; i < numKernelLines; i++)
973*6467f958SSadaf Ebrahimi                     log_error("%s", kernelProgram[i]);
974*6467f958SSadaf Ebrahimi                 printedSource = 1;
975*6467f958SSadaf Ebrahimi             }
976*6467f958SSadaf Ebrahimi             log_error("Build log for device \"%s\" is: ------------\n",
977*6467f958SSadaf Ebrahimi                       deviceName);
978*6467f958SSadaf Ebrahimi             log_error("%s\n", log.c_str());
979*6467f958SSadaf Ebrahimi             log_error("\n----------\n");
980*6467f958SSadaf Ebrahimi             return -1;
981*6467f958SSadaf Ebrahimi         }
982*6467f958SSadaf Ebrahimi     }
983*6467f958SSadaf Ebrahimi 
984*6467f958SSadaf Ebrahimi     if (buildFailed)
985*6467f958SSadaf Ebrahimi     {
986*6467f958SSadaf Ebrahimi         return -1;
987*6467f958SSadaf Ebrahimi     }
988*6467f958SSadaf Ebrahimi 
989*6467f958SSadaf Ebrahimi     /* And create a kernel from it */
990*6467f958SSadaf Ebrahimi     if (kernelName != NULL)
991*6467f958SSadaf Ebrahimi     {
992*6467f958SSadaf Ebrahimi         *outKernel = clCreateKernel(*outProgram, kernelName, &error);
993*6467f958SSadaf Ebrahimi         if (*outKernel == NULL || error != CL_SUCCESS)
994*6467f958SSadaf Ebrahimi         {
995*6467f958SSadaf Ebrahimi             print_error(error, "Unable to create kernel");
996*6467f958SSadaf Ebrahimi             return error;
997*6467f958SSadaf Ebrahimi         }
998*6467f958SSadaf Ebrahimi     }
999*6467f958SSadaf Ebrahimi 
1000*6467f958SSadaf Ebrahimi     return 0;
1001*6467f958SSadaf Ebrahimi }
1002*6467f958SSadaf Ebrahimi 
get_max_allowed_work_group_size(cl_context context,cl_kernel kernel,size_t * outMaxSize,size_t * outLimits)1003*6467f958SSadaf Ebrahimi int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
1004*6467f958SSadaf Ebrahimi                                     size_t *outMaxSize, size_t *outLimits)
1005*6467f958SSadaf Ebrahimi {
1006*6467f958SSadaf Ebrahimi     cl_device_id *devices;
1007*6467f958SSadaf Ebrahimi     size_t size, maxCommonSize = 0;
1008*6467f958SSadaf Ebrahimi     int numDevices, i, j, error;
1009*6467f958SSadaf Ebrahimi     cl_uint numDims;
1010*6467f958SSadaf Ebrahimi     size_t outSize;
1011*6467f958SSadaf Ebrahimi     size_t sizeLimit[] = { 1, 1, 1 };
1012*6467f958SSadaf Ebrahimi 
1013*6467f958SSadaf Ebrahimi 
1014*6467f958SSadaf Ebrahimi     /* Assume fewer than 16 devices will be returned */
1015*6467f958SSadaf Ebrahimi     error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &outSize);
1016*6467f958SSadaf Ebrahimi     test_error(error, "Unable to obtain list of devices size for context");
1017*6467f958SSadaf Ebrahimi     devices = (cl_device_id *)malloc(outSize);
1018*6467f958SSadaf Ebrahimi     BufferOwningPtr<cl_device_id> devicesBuf(devices);
1019*6467f958SSadaf Ebrahimi 
1020*6467f958SSadaf Ebrahimi     error =
1021*6467f958SSadaf Ebrahimi         clGetContextInfo(context, CL_CONTEXT_DEVICES, outSize, devices, NULL);
1022*6467f958SSadaf Ebrahimi     test_error(error, "Unable to obtain list of devices for context");
1023*6467f958SSadaf Ebrahimi 
1024*6467f958SSadaf Ebrahimi     numDevices = (int)(outSize / sizeof(cl_device_id));
1025*6467f958SSadaf Ebrahimi 
1026*6467f958SSadaf Ebrahimi     for (i = 0; i < numDevices; i++)
1027*6467f958SSadaf Ebrahimi     {
1028*6467f958SSadaf Ebrahimi         error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE,
1029*6467f958SSadaf Ebrahimi                                 sizeof(size), &size, NULL);
1030*6467f958SSadaf Ebrahimi         test_error(error, "Unable to obtain max work group size for device");
1031*6467f958SSadaf Ebrahimi         if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
1032*6467f958SSadaf Ebrahimi 
1033*6467f958SSadaf Ebrahimi         error = clGetKernelWorkGroupInfo(kernel, devices[i],
1034*6467f958SSadaf Ebrahimi                                          CL_KERNEL_WORK_GROUP_SIZE,
1035*6467f958SSadaf Ebrahimi                                          sizeof(size), &size, NULL);
1036*6467f958SSadaf Ebrahimi         test_error(
1037*6467f958SSadaf Ebrahimi             error,
1038*6467f958SSadaf Ebrahimi             "Unable to obtain max work group size for device and kernel combo");
1039*6467f958SSadaf Ebrahimi         if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
1040*6467f958SSadaf Ebrahimi 
1041*6467f958SSadaf Ebrahimi         error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
1042*6467f958SSadaf Ebrahimi                                 sizeof(numDims), &numDims, NULL);
1043*6467f958SSadaf Ebrahimi         test_error(
1044*6467f958SSadaf Ebrahimi             error,
1045*6467f958SSadaf Ebrahimi             "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
1046*6467f958SSadaf Ebrahimi         sizeLimit[0] = 1;
1047*6467f958SSadaf Ebrahimi         error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES,
1048*6467f958SSadaf Ebrahimi                                 numDims * sizeof(size_t), sizeLimit, NULL);
1049*6467f958SSadaf Ebrahimi         test_error(error,
1050*6467f958SSadaf Ebrahimi                    "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
1051*6467f958SSadaf Ebrahimi 
1052*6467f958SSadaf Ebrahimi         if (outLimits != NULL)
1053*6467f958SSadaf Ebrahimi         {
1054*6467f958SSadaf Ebrahimi             if (i == 0)
1055*6467f958SSadaf Ebrahimi             {
1056*6467f958SSadaf Ebrahimi                 for (j = 0; j < 3; j++) outLimits[j] = sizeLimit[j];
1057*6467f958SSadaf Ebrahimi             }
1058*6467f958SSadaf Ebrahimi             else
1059*6467f958SSadaf Ebrahimi             {
1060*6467f958SSadaf Ebrahimi                 for (j = 0; j < (int)numDims; j++)
1061*6467f958SSadaf Ebrahimi                 {
1062*6467f958SSadaf Ebrahimi                     if (sizeLimit[j] < outLimits[j])
1063*6467f958SSadaf Ebrahimi                         outLimits[j] = sizeLimit[j];
1064*6467f958SSadaf Ebrahimi                 }
1065*6467f958SSadaf Ebrahimi             }
1066*6467f958SSadaf Ebrahimi         }
1067*6467f958SSadaf Ebrahimi     }
1068*6467f958SSadaf Ebrahimi 
1069*6467f958SSadaf Ebrahimi     *outMaxSize = (unsigned int)maxCommonSize;
1070*6467f958SSadaf Ebrahimi     return 0;
1071*6467f958SSadaf Ebrahimi }
1072*6467f958SSadaf Ebrahimi 
1073*6467f958SSadaf Ebrahimi 
get_max_allowed_1d_work_group_size_on_device(cl_device_id device,cl_kernel kernel,size_t * outSize)1074*6467f958SSadaf Ebrahimi extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
1075*6467f958SSadaf Ebrahimi                                                         cl_kernel kernel,
1076*6467f958SSadaf Ebrahimi                                                         size_t *outSize)
1077*6467f958SSadaf Ebrahimi {
1078*6467f958SSadaf Ebrahimi     cl_uint maxDim;
1079*6467f958SSadaf Ebrahimi     size_t maxWgSize;
1080*6467f958SSadaf Ebrahimi     size_t *maxWgSizePerDim;
1081*6467f958SSadaf Ebrahimi     int error;
1082*6467f958SSadaf Ebrahimi 
1083*6467f958SSadaf Ebrahimi     error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
1084*6467f958SSadaf Ebrahimi                                      sizeof(size_t), &maxWgSize, NULL);
1085*6467f958SSadaf Ebrahimi     test_error(error,
1086*6467f958SSadaf Ebrahimi                "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed");
1087*6467f958SSadaf Ebrahimi 
1088*6467f958SSadaf Ebrahimi     error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
1089*6467f958SSadaf Ebrahimi                             sizeof(cl_uint), &maxDim, NULL);
1090*6467f958SSadaf Ebrahimi     test_error(error,
1091*6467f958SSadaf Ebrahimi                "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
1092*6467f958SSadaf Ebrahimi     maxWgSizePerDim = (size_t *)malloc(maxDim * sizeof(size_t));
1093*6467f958SSadaf Ebrahimi     if (!maxWgSizePerDim)
1094*6467f958SSadaf Ebrahimi     {
1095*6467f958SSadaf Ebrahimi         log_error("Unable to allocate maxWgSizePerDim\n");
1096*6467f958SSadaf Ebrahimi         return -1;
1097*6467f958SSadaf Ebrahimi     }
1098*6467f958SSadaf Ebrahimi 
1099*6467f958SSadaf Ebrahimi     error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1100*6467f958SSadaf Ebrahimi                             maxDim * sizeof(size_t), maxWgSizePerDim, NULL);
1101*6467f958SSadaf Ebrahimi     if (error != CL_SUCCESS)
1102*6467f958SSadaf Ebrahimi     {
1103*6467f958SSadaf Ebrahimi         log_error("clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n");
1104*6467f958SSadaf Ebrahimi         free(maxWgSizePerDim);
1105*6467f958SSadaf Ebrahimi         return error;
1106*6467f958SSadaf Ebrahimi     }
1107*6467f958SSadaf Ebrahimi 
1108*6467f958SSadaf Ebrahimi     // "maxWgSize" is limited to that of the first dimension.
1109*6467f958SSadaf Ebrahimi     if (maxWgSize > maxWgSizePerDim[0])
1110*6467f958SSadaf Ebrahimi     {
1111*6467f958SSadaf Ebrahimi         maxWgSize = maxWgSizePerDim[0];
1112*6467f958SSadaf Ebrahimi     }
1113*6467f958SSadaf Ebrahimi 
1114*6467f958SSadaf Ebrahimi     free(maxWgSizePerDim);
1115*6467f958SSadaf Ebrahimi 
1116*6467f958SSadaf Ebrahimi     *outSize = maxWgSize;
1117*6467f958SSadaf Ebrahimi     return 0;
1118*6467f958SSadaf Ebrahimi }
1119*6467f958SSadaf Ebrahimi 
1120*6467f958SSadaf Ebrahimi 
get_max_common_work_group_size(cl_context context,cl_kernel kernel,size_t globalThreadSize,size_t * outMaxSize)1121*6467f958SSadaf Ebrahimi int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
1122*6467f958SSadaf Ebrahimi                                    size_t globalThreadSize, size_t *outMaxSize)
1123*6467f958SSadaf Ebrahimi {
1124*6467f958SSadaf Ebrahimi     size_t sizeLimit[3];
1125*6467f958SSadaf Ebrahimi     int error =
1126*6467f958SSadaf Ebrahimi         get_max_allowed_work_group_size(context, kernel, outMaxSize, sizeLimit);
1127*6467f958SSadaf Ebrahimi     if (error != 0) return error;
1128*6467f958SSadaf Ebrahimi 
1129*6467f958SSadaf Ebrahimi     /* Now find the largest factor of globalThreadSize that is <= maxCommonSize
1130*6467f958SSadaf Ebrahimi      */
1131*6467f958SSadaf Ebrahimi     /* Note for speed, we don't need to check the range of maxCommonSize, b/c
1132*6467f958SSadaf Ebrahimi      once it gets to 1, the modulo test will succeed and break the loop anyway
1133*6467f958SSadaf Ebrahimi    */
1134*6467f958SSadaf Ebrahimi     for (;
1135*6467f958SSadaf Ebrahimi          (globalThreadSize % *outMaxSize) != 0 || (*outMaxSize > sizeLimit[0]);
1136*6467f958SSadaf Ebrahimi          (*outMaxSize)--)
1137*6467f958SSadaf Ebrahimi         ;
1138*6467f958SSadaf Ebrahimi     return 0;
1139*6467f958SSadaf Ebrahimi }
1140*6467f958SSadaf Ebrahimi 
get_max_common_2D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1141*6467f958SSadaf Ebrahimi int get_max_common_2D_work_group_size(cl_context context, cl_kernel kernel,
1142*6467f958SSadaf Ebrahimi                                       size_t *globalThreadSizes,
1143*6467f958SSadaf Ebrahimi                                       size_t *outMaxSizes)
1144*6467f958SSadaf Ebrahimi {
1145*6467f958SSadaf Ebrahimi     size_t sizeLimit[3];
1146*6467f958SSadaf Ebrahimi     size_t maxSize;
1147*6467f958SSadaf Ebrahimi     int error =
1148*6467f958SSadaf Ebrahimi         get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
1149*6467f958SSadaf Ebrahimi     if (error != 0) return error;
1150*6467f958SSadaf Ebrahimi 
1151*6467f958SSadaf Ebrahimi     /* Now find a set of factors, multiplied together less than maxSize, but
1152*6467f958SSadaf Ebrahimi        each a factor of the global sizes */
1153*6467f958SSadaf Ebrahimi 
1154*6467f958SSadaf Ebrahimi     /* Simple case */
1155*6467f958SSadaf Ebrahimi     if (globalThreadSizes[0] * globalThreadSizes[1] <= maxSize)
1156*6467f958SSadaf Ebrahimi     {
1157*6467f958SSadaf Ebrahimi         if (globalThreadSizes[0] <= sizeLimit[0]
1158*6467f958SSadaf Ebrahimi             && globalThreadSizes[1] <= sizeLimit[1])
1159*6467f958SSadaf Ebrahimi         {
1160*6467f958SSadaf Ebrahimi             outMaxSizes[0] = globalThreadSizes[0];
1161*6467f958SSadaf Ebrahimi             outMaxSizes[1] = globalThreadSizes[1];
1162*6467f958SSadaf Ebrahimi             return 0;
1163*6467f958SSadaf Ebrahimi         }
1164*6467f958SSadaf Ebrahimi     }
1165*6467f958SSadaf Ebrahimi 
1166*6467f958SSadaf Ebrahimi     size_t remainingSize, sizeForThisOne;
1167*6467f958SSadaf Ebrahimi     remainingSize = maxSize;
1168*6467f958SSadaf Ebrahimi     int i, j;
1169*6467f958SSadaf Ebrahimi     for (i = 0; i < 2; i++)
1170*6467f958SSadaf Ebrahimi     {
1171*6467f958SSadaf Ebrahimi         if (globalThreadSizes[i] > remainingSize)
1172*6467f958SSadaf Ebrahimi             sizeForThisOne = remainingSize;
1173*6467f958SSadaf Ebrahimi         else
1174*6467f958SSadaf Ebrahimi             sizeForThisOne = globalThreadSizes[i];
1175*6467f958SSadaf Ebrahimi         for (; (globalThreadSizes[i] % sizeForThisOne) != 0
1176*6467f958SSadaf Ebrahimi              || (sizeForThisOne > sizeLimit[i]);
1177*6467f958SSadaf Ebrahimi              sizeForThisOne--)
1178*6467f958SSadaf Ebrahimi             ;
1179*6467f958SSadaf Ebrahimi         outMaxSizes[i] = sizeForThisOne;
1180*6467f958SSadaf Ebrahimi         remainingSize = maxSize;
1181*6467f958SSadaf Ebrahimi         for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
1182*6467f958SSadaf Ebrahimi     }
1183*6467f958SSadaf Ebrahimi 
1184*6467f958SSadaf Ebrahimi     return 0;
1185*6467f958SSadaf Ebrahimi }
1186*6467f958SSadaf Ebrahimi 
get_max_common_3D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1187*6467f958SSadaf Ebrahimi int get_max_common_3D_work_group_size(cl_context context, cl_kernel kernel,
1188*6467f958SSadaf Ebrahimi                                       size_t *globalThreadSizes,
1189*6467f958SSadaf Ebrahimi                                       size_t *outMaxSizes)
1190*6467f958SSadaf Ebrahimi {
1191*6467f958SSadaf Ebrahimi     size_t sizeLimit[3];
1192*6467f958SSadaf Ebrahimi     size_t maxSize;
1193*6467f958SSadaf Ebrahimi     int error =
1194*6467f958SSadaf Ebrahimi         get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
1195*6467f958SSadaf Ebrahimi     if (error != 0) return error;
1196*6467f958SSadaf Ebrahimi     /* Now find a set of factors, multiplied together less than maxSize, but
1197*6467f958SSadaf Ebrahimi      each a factor of the global sizes */
1198*6467f958SSadaf Ebrahimi 
1199*6467f958SSadaf Ebrahimi     /* Simple case */
1200*6467f958SSadaf Ebrahimi     if (globalThreadSizes[0] * globalThreadSizes[1] * globalThreadSizes[2]
1201*6467f958SSadaf Ebrahimi         <= maxSize)
1202*6467f958SSadaf Ebrahimi     {
1203*6467f958SSadaf Ebrahimi         if (globalThreadSizes[0] <= sizeLimit[0]
1204*6467f958SSadaf Ebrahimi             && globalThreadSizes[1] <= sizeLimit[1]
1205*6467f958SSadaf Ebrahimi             && globalThreadSizes[2] <= sizeLimit[2])
1206*6467f958SSadaf Ebrahimi         {
1207*6467f958SSadaf Ebrahimi             outMaxSizes[0] = globalThreadSizes[0];
1208*6467f958SSadaf Ebrahimi             outMaxSizes[1] = globalThreadSizes[1];
1209*6467f958SSadaf Ebrahimi             outMaxSizes[2] = globalThreadSizes[2];
1210*6467f958SSadaf Ebrahimi             return 0;
1211*6467f958SSadaf Ebrahimi         }
1212*6467f958SSadaf Ebrahimi     }
1213*6467f958SSadaf Ebrahimi 
1214*6467f958SSadaf Ebrahimi     size_t remainingSize, sizeForThisOne;
1215*6467f958SSadaf Ebrahimi     remainingSize = maxSize;
1216*6467f958SSadaf Ebrahimi     int i, j;
1217*6467f958SSadaf Ebrahimi     for (i = 0; i < 3; i++)
1218*6467f958SSadaf Ebrahimi     {
1219*6467f958SSadaf Ebrahimi         if (globalThreadSizes[i] > remainingSize)
1220*6467f958SSadaf Ebrahimi             sizeForThisOne = remainingSize;
1221*6467f958SSadaf Ebrahimi         else
1222*6467f958SSadaf Ebrahimi             sizeForThisOne = globalThreadSizes[i];
1223*6467f958SSadaf Ebrahimi         for (; (globalThreadSizes[i] % sizeForThisOne) != 0
1224*6467f958SSadaf Ebrahimi              || (sizeForThisOne > sizeLimit[i]);
1225*6467f958SSadaf Ebrahimi              sizeForThisOne--)
1226*6467f958SSadaf Ebrahimi             ;
1227*6467f958SSadaf Ebrahimi         outMaxSizes[i] = sizeForThisOne;
1228*6467f958SSadaf Ebrahimi         remainingSize = maxSize;
1229*6467f958SSadaf Ebrahimi         for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
1230*6467f958SSadaf Ebrahimi     }
1231*6467f958SSadaf Ebrahimi 
1232*6467f958SSadaf Ebrahimi     return 0;
1233*6467f958SSadaf Ebrahimi }
1234*6467f958SSadaf Ebrahimi 
1235*6467f958SSadaf Ebrahimi /* Helper to determine if a device supports an image format */
is_image_format_supported(cl_context context,cl_mem_flags flags,cl_mem_object_type image_type,const cl_image_format * fmt)1236*6467f958SSadaf Ebrahimi int is_image_format_supported(cl_context context, cl_mem_flags flags,
1237*6467f958SSadaf Ebrahimi                               cl_mem_object_type image_type,
1238*6467f958SSadaf Ebrahimi                               const cl_image_format *fmt)
1239*6467f958SSadaf Ebrahimi {
1240*6467f958SSadaf Ebrahimi     cl_image_format *list;
1241*6467f958SSadaf Ebrahimi     cl_uint count = 0;
1242*6467f958SSadaf Ebrahimi     cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
1243*6467f958SSadaf Ebrahimi                                             NULL, &count);
1244*6467f958SSadaf Ebrahimi     if (count == 0) return 0;
1245*6467f958SSadaf Ebrahimi 
1246*6467f958SSadaf Ebrahimi     list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
1247*6467f958SSadaf Ebrahimi     if (NULL == list)
1248*6467f958SSadaf Ebrahimi     {
1249*6467f958SSadaf Ebrahimi         log_error("Error: unable to allocate %zu byte buffer for image format "
1250*6467f958SSadaf Ebrahimi                   "list at %s:%d (err = %d)\n",
1251*6467f958SSadaf Ebrahimi                   count * sizeof(cl_image_format), __FILE__, __LINE__, err);
1252*6467f958SSadaf Ebrahimi         return 0;
1253*6467f958SSadaf Ebrahimi     }
1254*6467f958SSadaf Ebrahimi     BufferOwningPtr<cl_image_format> listBuf(list);
1255*6467f958SSadaf Ebrahimi 
1256*6467f958SSadaf Ebrahimi 
1257*6467f958SSadaf Ebrahimi     cl_int error = clGetSupportedImageFormats(context, flags, image_type, count,
1258*6467f958SSadaf Ebrahimi                                               list, NULL);
1259*6467f958SSadaf Ebrahimi     if (error)
1260*6467f958SSadaf Ebrahimi     {
1261*6467f958SSadaf Ebrahimi         log_error("Error: failed to obtain supported image type list at %s:%d "
1262*6467f958SSadaf Ebrahimi                   "(err = %d)\n",
1263*6467f958SSadaf Ebrahimi                   __FILE__, __LINE__, err);
1264*6467f958SSadaf Ebrahimi         return 0;
1265*6467f958SSadaf Ebrahimi     }
1266*6467f958SSadaf Ebrahimi 
1267*6467f958SSadaf Ebrahimi     // iterate looking for a match.
1268*6467f958SSadaf Ebrahimi     cl_uint i;
1269*6467f958SSadaf Ebrahimi     for (i = 0; i < count; i++)
1270*6467f958SSadaf Ebrahimi     {
1271*6467f958SSadaf Ebrahimi         if (fmt->image_channel_data_type == list[i].image_channel_data_type
1272*6467f958SSadaf Ebrahimi             && fmt->image_channel_order == list[i].image_channel_order)
1273*6467f958SSadaf Ebrahimi             break;
1274*6467f958SSadaf Ebrahimi     }
1275*6467f958SSadaf Ebrahimi 
1276*6467f958SSadaf Ebrahimi     return (i < count) ? 1 : 0;
1277*6467f958SSadaf Ebrahimi }
1278*6467f958SSadaf Ebrahimi 
1279*6467f958SSadaf Ebrahimi size_t get_pixel_bytes(const cl_image_format *fmt);
get_pixel_bytes(const cl_image_format * fmt)1280*6467f958SSadaf Ebrahimi size_t get_pixel_bytes(const cl_image_format *fmt)
1281*6467f958SSadaf Ebrahimi {
1282*6467f958SSadaf Ebrahimi     size_t chanCount;
1283*6467f958SSadaf Ebrahimi     switch (fmt->image_channel_order)
1284*6467f958SSadaf Ebrahimi     {
1285*6467f958SSadaf Ebrahimi         case CL_R:
1286*6467f958SSadaf Ebrahimi         case CL_A:
1287*6467f958SSadaf Ebrahimi         case CL_Rx:
1288*6467f958SSadaf Ebrahimi         case CL_INTENSITY:
1289*6467f958SSadaf Ebrahimi         case CL_LUMINANCE:
1290*6467f958SSadaf Ebrahimi         case CL_DEPTH: chanCount = 1; break;
1291*6467f958SSadaf Ebrahimi         case CL_RG:
1292*6467f958SSadaf Ebrahimi         case CL_RA:
1293*6467f958SSadaf Ebrahimi         case CL_RGx: chanCount = 2; break;
1294*6467f958SSadaf Ebrahimi         case CL_RGB:
1295*6467f958SSadaf Ebrahimi         case CL_RGBx:
1296*6467f958SSadaf Ebrahimi         case CL_sRGB:
1297*6467f958SSadaf Ebrahimi         case CL_sRGBx: chanCount = 3; break;
1298*6467f958SSadaf Ebrahimi         case CL_RGBA:
1299*6467f958SSadaf Ebrahimi         case CL_ARGB:
1300*6467f958SSadaf Ebrahimi         case CL_BGRA:
1301*6467f958SSadaf Ebrahimi         case CL_sBGRA:
1302*6467f958SSadaf Ebrahimi         case CL_sRGBA:
1303*6467f958SSadaf Ebrahimi #ifdef CL_1RGB_APPLE
1304*6467f958SSadaf Ebrahimi         case CL_1RGB_APPLE:
1305*6467f958SSadaf Ebrahimi #endif
1306*6467f958SSadaf Ebrahimi #ifdef CL_BGR1_APPLE
1307*6467f958SSadaf Ebrahimi         case CL_BGR1_APPLE:
1308*6467f958SSadaf Ebrahimi #endif
1309*6467f958SSadaf Ebrahimi             chanCount = 4;
1310*6467f958SSadaf Ebrahimi             break;
1311*6467f958SSadaf Ebrahimi         default:
1312*6467f958SSadaf Ebrahimi             log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__);
1313*6467f958SSadaf Ebrahimi             abort();
1314*6467f958SSadaf Ebrahimi             break;
1315*6467f958SSadaf Ebrahimi     }
1316*6467f958SSadaf Ebrahimi 
1317*6467f958SSadaf Ebrahimi     switch (fmt->image_channel_data_type)
1318*6467f958SSadaf Ebrahimi     {
1319*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_565:
1320*6467f958SSadaf Ebrahimi         case CL_UNORM_SHORT_555: return 2;
1321*6467f958SSadaf Ebrahimi 
1322*6467f958SSadaf Ebrahimi         case CL_UNORM_INT_101010: return 4;
1323*6467f958SSadaf Ebrahimi 
1324*6467f958SSadaf Ebrahimi         case CL_SNORM_INT8:
1325*6467f958SSadaf Ebrahimi         case CL_UNORM_INT8:
1326*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT8:
1327*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT8: return chanCount;
1328*6467f958SSadaf Ebrahimi 
1329*6467f958SSadaf Ebrahimi         case CL_SNORM_INT16:
1330*6467f958SSadaf Ebrahimi         case CL_UNORM_INT16:
1331*6467f958SSadaf Ebrahimi         case CL_HALF_FLOAT:
1332*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT16:
1333*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT16:
1334*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
1335*6467f958SSadaf Ebrahimi         case CL_SFIXED14_APPLE:
1336*6467f958SSadaf Ebrahimi #endif
1337*6467f958SSadaf Ebrahimi             return chanCount * 2;
1338*6467f958SSadaf Ebrahimi 
1339*6467f958SSadaf Ebrahimi         case CL_SIGNED_INT32:
1340*6467f958SSadaf Ebrahimi         case CL_UNSIGNED_INT32:
1341*6467f958SSadaf Ebrahimi         case CL_FLOAT: return chanCount * 4;
1342*6467f958SSadaf Ebrahimi 
1343*6467f958SSadaf Ebrahimi         default:
1344*6467f958SSadaf Ebrahimi             log_error("Unknown channel data type at %s:%d!\n", __FILE__,
1345*6467f958SSadaf Ebrahimi                       __LINE__);
1346*6467f958SSadaf Ebrahimi             abort();
1347*6467f958SSadaf Ebrahimi     }
1348*6467f958SSadaf Ebrahimi 
1349*6467f958SSadaf Ebrahimi     return 0;
1350*6467f958SSadaf Ebrahimi }
1351*6467f958SSadaf Ebrahimi 
verifyImageSupport(cl_device_id device)1352*6467f958SSadaf Ebrahimi test_status verifyImageSupport(cl_device_id device)
1353*6467f958SSadaf Ebrahimi {
1354*6467f958SSadaf Ebrahimi     int result = checkForImageSupport(device);
1355*6467f958SSadaf Ebrahimi     if (result == 0)
1356*6467f958SSadaf Ebrahimi     {
1357*6467f958SSadaf Ebrahimi         return TEST_PASS;
1358*6467f958SSadaf Ebrahimi     }
1359*6467f958SSadaf Ebrahimi     if (result == CL_IMAGE_FORMAT_NOT_SUPPORTED)
1360*6467f958SSadaf Ebrahimi     {
1361*6467f958SSadaf Ebrahimi         log_error("SKIPPED: Device does not supported images as required by "
1362*6467f958SSadaf Ebrahimi                   "this test!\n");
1363*6467f958SSadaf Ebrahimi         return TEST_SKIP;
1364*6467f958SSadaf Ebrahimi     }
1365*6467f958SSadaf Ebrahimi     return TEST_FAIL;
1366*6467f958SSadaf Ebrahimi }
1367*6467f958SSadaf Ebrahimi 
checkForImageSupport(cl_device_id device)1368*6467f958SSadaf Ebrahimi int checkForImageSupport(cl_device_id device)
1369*6467f958SSadaf Ebrahimi {
1370*6467f958SSadaf Ebrahimi     cl_uint i;
1371*6467f958SSadaf Ebrahimi     int error;
1372*6467f958SSadaf Ebrahimi 
1373*6467f958SSadaf Ebrahimi 
1374*6467f958SSadaf Ebrahimi     /* Check the device props to see if images are supported at all first */
1375*6467f958SSadaf Ebrahimi     error =
1376*6467f958SSadaf Ebrahimi         clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
1377*6467f958SSadaf Ebrahimi     test_error(error, "Unable to query device for image support");
1378*6467f958SSadaf Ebrahimi     if (i == 0)
1379*6467f958SSadaf Ebrahimi     {
1380*6467f958SSadaf Ebrahimi         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1381*6467f958SSadaf Ebrahimi     }
1382*6467f958SSadaf Ebrahimi 
1383*6467f958SSadaf Ebrahimi     /* So our support is good */
1384*6467f958SSadaf Ebrahimi     return 0;
1385*6467f958SSadaf Ebrahimi }
1386*6467f958SSadaf Ebrahimi 
checkFor3DImageSupport(cl_device_id device)1387*6467f958SSadaf Ebrahimi int checkFor3DImageSupport(cl_device_id device)
1388*6467f958SSadaf Ebrahimi {
1389*6467f958SSadaf Ebrahimi     cl_uint i;
1390*6467f958SSadaf Ebrahimi     int error;
1391*6467f958SSadaf Ebrahimi 
1392*6467f958SSadaf Ebrahimi     /* Check the device props to see if images are supported at all first */
1393*6467f958SSadaf Ebrahimi     error =
1394*6467f958SSadaf Ebrahimi         clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
1395*6467f958SSadaf Ebrahimi     test_error(error, "Unable to query device for image support");
1396*6467f958SSadaf Ebrahimi     if (i == 0)
1397*6467f958SSadaf Ebrahimi     {
1398*6467f958SSadaf Ebrahimi         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1399*6467f958SSadaf Ebrahimi     }
1400*6467f958SSadaf Ebrahimi 
1401*6467f958SSadaf Ebrahimi     char profile[128];
1402*6467f958SSadaf Ebrahimi     error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
1403*6467f958SSadaf Ebrahimi                             NULL);
1404*6467f958SSadaf Ebrahimi     test_error(error, "Unable to query device for CL_DEVICE_PROFILE");
1405*6467f958SSadaf Ebrahimi     if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
1406*6467f958SSadaf Ebrahimi     {
1407*6467f958SSadaf Ebrahimi         size_t width = -1L;
1408*6467f958SSadaf Ebrahimi         size_t height = -1L;
1409*6467f958SSadaf Ebrahimi         size_t depth = -1L;
1410*6467f958SSadaf Ebrahimi         error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
1411*6467f958SSadaf Ebrahimi                                 sizeof(width), &width, NULL);
1412*6467f958SSadaf Ebrahimi         test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH");
1413*6467f958SSadaf Ebrahimi         error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
1414*6467f958SSadaf Ebrahimi                                 sizeof(height), &height, NULL);
1415*6467f958SSadaf Ebrahimi         test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT");
1416*6467f958SSadaf Ebrahimi         error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
1417*6467f958SSadaf Ebrahimi                                 sizeof(depth), &depth, NULL);
1418*6467f958SSadaf Ebrahimi         test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH");
1419*6467f958SSadaf Ebrahimi 
1420*6467f958SSadaf Ebrahimi         if (0 == (height | width | depth)) return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1421*6467f958SSadaf Ebrahimi     }
1422*6467f958SSadaf Ebrahimi 
1423*6467f958SSadaf Ebrahimi     /* So our support is good */
1424*6467f958SSadaf Ebrahimi     return 0;
1425*6467f958SSadaf Ebrahimi }
1426*6467f958SSadaf Ebrahimi 
checkForReadWriteImageSupport(cl_device_id device)1427*6467f958SSadaf Ebrahimi int checkForReadWriteImageSupport(cl_device_id device)
1428*6467f958SSadaf Ebrahimi {
1429*6467f958SSadaf Ebrahimi     if (checkForImageSupport(device))
1430*6467f958SSadaf Ebrahimi     {
1431*6467f958SSadaf Ebrahimi         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1432*6467f958SSadaf Ebrahimi     }
1433*6467f958SSadaf Ebrahimi 
1434*6467f958SSadaf Ebrahimi     auto device_cl_version = get_device_cl_version(device);
1435*6467f958SSadaf Ebrahimi     if (device_cl_version >= Version(3, 0))
1436*6467f958SSadaf Ebrahimi     {
1437*6467f958SSadaf Ebrahimi         // In OpenCL 3.0, Read-Write images are optional.
1438*6467f958SSadaf Ebrahimi         // Check if they are supported.
1439*6467f958SSadaf Ebrahimi         cl_uint are_rw_images_supported{};
1440*6467f958SSadaf Ebrahimi         test_error(
1441*6467f958SSadaf Ebrahimi             clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
1442*6467f958SSadaf Ebrahimi                             sizeof(are_rw_images_supported),
1443*6467f958SSadaf Ebrahimi                             &are_rw_images_supported, nullptr),
1444*6467f958SSadaf Ebrahimi             "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
1445*6467f958SSadaf Ebrahimi         if (0 == are_rw_images_supported)
1446*6467f958SSadaf Ebrahimi         {
1447*6467f958SSadaf Ebrahimi             log_info("READ_WRITE_IMAGE tests skipped, not supported.\n");
1448*6467f958SSadaf Ebrahimi             return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1449*6467f958SSadaf Ebrahimi         }
1450*6467f958SSadaf Ebrahimi     }
1451*6467f958SSadaf Ebrahimi     // READ_WRITE images are not supported on 1.X devices.
1452*6467f958SSadaf Ebrahimi     else if (device_cl_version < Version(2, 0))
1453*6467f958SSadaf Ebrahimi     {
1454*6467f958SSadaf Ebrahimi         log_info("READ_WRITE_IMAGE tests skipped, Opencl 2.0+ is requried.");
1455*6467f958SSadaf Ebrahimi         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1456*6467f958SSadaf Ebrahimi     }
1457*6467f958SSadaf Ebrahimi     // Support for read-write image arguments is required
1458*6467f958SSadaf Ebrahimi     // for an 2.X device if the device supports images.
1459*6467f958SSadaf Ebrahimi 
1460*6467f958SSadaf Ebrahimi     /* So our support is good */
1461*6467f958SSadaf Ebrahimi     return 0;
1462*6467f958SSadaf Ebrahimi }
1463*6467f958SSadaf Ebrahimi 
get_min_alignment(cl_context context)1464*6467f958SSadaf Ebrahimi size_t get_min_alignment(cl_context context)
1465*6467f958SSadaf Ebrahimi {
1466*6467f958SSadaf Ebrahimi     static cl_uint align_size = 0;
1467*6467f958SSadaf Ebrahimi 
1468*6467f958SSadaf Ebrahimi     if (0 == align_size)
1469*6467f958SSadaf Ebrahimi     {
1470*6467f958SSadaf Ebrahimi         cl_device_id *devices;
1471*6467f958SSadaf Ebrahimi         size_t devices_size = 0;
1472*6467f958SSadaf Ebrahimi         cl_uint result = 0;
1473*6467f958SSadaf Ebrahimi         cl_int error;
1474*6467f958SSadaf Ebrahimi         int i;
1475*6467f958SSadaf Ebrahimi 
1476*6467f958SSadaf Ebrahimi         error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
1477*6467f958SSadaf Ebrahimi                                  &devices_size);
1478*6467f958SSadaf Ebrahimi         test_error_ret(error, "clGetContextInfo failed", 0);
1479*6467f958SSadaf Ebrahimi 
1480*6467f958SSadaf Ebrahimi         devices = (cl_device_id *)malloc(devices_size);
1481*6467f958SSadaf Ebrahimi         if (devices == NULL)
1482*6467f958SSadaf Ebrahimi         {
1483*6467f958SSadaf Ebrahimi             print_error(error, "malloc failed");
1484*6467f958SSadaf Ebrahimi             return 0;
1485*6467f958SSadaf Ebrahimi         }
1486*6467f958SSadaf Ebrahimi 
1487*6467f958SSadaf Ebrahimi         error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size,
1488*6467f958SSadaf Ebrahimi                                  (void *)devices, NULL);
1489*6467f958SSadaf Ebrahimi         test_error_ret(error, "clGetContextInfo failed", 0);
1490*6467f958SSadaf Ebrahimi 
1491*6467f958SSadaf Ebrahimi         for (i = 0; i < (int)(devices_size / sizeof(cl_device_id)); i++)
1492*6467f958SSadaf Ebrahimi         {
1493*6467f958SSadaf Ebrahimi             cl_uint alignment = 0;
1494*6467f958SSadaf Ebrahimi 
1495*6467f958SSadaf Ebrahimi             error = clGetDeviceInfo(devices[i], CL_DEVICE_MEM_BASE_ADDR_ALIGN,
1496*6467f958SSadaf Ebrahimi                                     sizeof(cl_uint), (void *)&alignment, NULL);
1497*6467f958SSadaf Ebrahimi 
1498*6467f958SSadaf Ebrahimi             if (error == CL_SUCCESS)
1499*6467f958SSadaf Ebrahimi             {
1500*6467f958SSadaf Ebrahimi                 alignment >>= 3; // convert bits to bytes
1501*6467f958SSadaf Ebrahimi                 result = (alignment > result) ? alignment : result;
1502*6467f958SSadaf Ebrahimi             }
1503*6467f958SSadaf Ebrahimi             else
1504*6467f958SSadaf Ebrahimi                 print_error(error, "clGetDeviceInfo failed");
1505*6467f958SSadaf Ebrahimi         }
1506*6467f958SSadaf Ebrahimi 
1507*6467f958SSadaf Ebrahimi         align_size = result;
1508*6467f958SSadaf Ebrahimi         free(devices);
1509*6467f958SSadaf Ebrahimi     }
1510*6467f958SSadaf Ebrahimi 
1511*6467f958SSadaf Ebrahimi     return align_size;
1512*6467f958SSadaf Ebrahimi }
1513*6467f958SSadaf Ebrahimi 
get_default_rounding_mode(cl_device_id device,const cl_uint & param)1514*6467f958SSadaf Ebrahimi cl_device_fp_config get_default_rounding_mode(cl_device_id device,
1515*6467f958SSadaf Ebrahimi                                               const cl_uint &param)
1516*6467f958SSadaf Ebrahimi {
1517*6467f958SSadaf Ebrahimi     if (param == CL_DEVICE_DOUBLE_FP_CONFIG)
1518*6467f958SSadaf Ebrahimi         test_error_ret(
1519*6467f958SSadaf Ebrahimi             -1,
1520*6467f958SSadaf Ebrahimi             "FAILURE: CL_DEVICE_DOUBLE_FP_CONFIG not supported by this routine",
1521*6467f958SSadaf Ebrahimi             0);
1522*6467f958SSadaf Ebrahimi 
1523*6467f958SSadaf Ebrahimi     char profileStr[128] = "";
1524*6467f958SSadaf Ebrahimi     cl_device_fp_config single = 0;
1525*6467f958SSadaf Ebrahimi     int error = clGetDeviceInfo(device, param, sizeof(single), &single, NULL);
1526*6467f958SSadaf Ebrahimi     if (error)
1527*6467f958SSadaf Ebrahimi     {
1528*6467f958SSadaf Ebrahimi         std::string message = std::string("Unable to get device ")
1529*6467f958SSadaf Ebrahimi             + std::string(param == CL_DEVICE_HALF_FP_CONFIG
1530*6467f958SSadaf Ebrahimi                               ? "CL_DEVICE_HALF_FP_CONFIG"
1531*6467f958SSadaf Ebrahimi                               : "CL_DEVICE_SINGLE_FP_CONFIG");
1532*6467f958SSadaf Ebrahimi         test_error_ret(error, message.c_str(), 0);
1533*6467f958SSadaf Ebrahimi     }
1534*6467f958SSadaf Ebrahimi 
1535*6467f958SSadaf Ebrahimi     if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
1536*6467f958SSadaf Ebrahimi 
1537*6467f958SSadaf Ebrahimi     if (0 == (single & CL_FP_ROUND_TO_ZERO))
1538*6467f958SSadaf Ebrahimi         test_error_ret(-1,
1539*6467f958SSadaf Ebrahimi                        "FAILURE: device must support either "
1540*6467f958SSadaf Ebrahimi                        "CL_FP_ROUND_TO_ZERO or CL_FP_ROUND_TO_NEAREST",
1541*6467f958SSadaf Ebrahimi                        0);
1542*6467f958SSadaf Ebrahimi 
1543*6467f958SSadaf Ebrahimi     // Make sure we are an embedded device before allowing a pass
1544*6467f958SSadaf Ebrahimi     if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profileStr),
1545*6467f958SSadaf Ebrahimi                                  &profileStr, NULL)))
1546*6467f958SSadaf Ebrahimi         test_error_ret(error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0);
1547*6467f958SSadaf Ebrahimi 
1548*6467f958SSadaf Ebrahimi     if (strcmp(profileStr, "EMBEDDED_PROFILE"))
1549*6467f958SSadaf Ebrahimi         test_error_ret(error,
1550*6467f958SSadaf Ebrahimi                        "FAILURE: non-EMBEDDED_PROFILE devices must support "
1551*6467f958SSadaf Ebrahimi                        "CL_FP_ROUND_TO_NEAREST",
1552*6467f958SSadaf Ebrahimi                        0);
1553*6467f958SSadaf Ebrahimi 
1554*6467f958SSadaf Ebrahimi     return CL_FP_ROUND_TO_ZERO;
1555*6467f958SSadaf Ebrahimi }
1556*6467f958SSadaf Ebrahimi 
checkDeviceForQueueSupport(cl_device_id device,cl_command_queue_properties prop)1557*6467f958SSadaf Ebrahimi int checkDeviceForQueueSupport(cl_device_id device,
1558*6467f958SSadaf Ebrahimi                                cl_command_queue_properties prop)
1559*6467f958SSadaf Ebrahimi {
1560*6467f958SSadaf Ebrahimi     cl_command_queue_properties realProps;
1561*6467f958SSadaf Ebrahimi     cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
1562*6467f958SSadaf Ebrahimi                                    sizeof(realProps), &realProps, NULL);
1563*6467f958SSadaf Ebrahimi     test_error_ret(error, "FAILURE: Unable to get device queue properties", 0);
1564*6467f958SSadaf Ebrahimi 
1565*6467f958SSadaf Ebrahimi     return (realProps & prop) ? 1 : 0;
1566*6467f958SSadaf Ebrahimi }
1567*6467f958SSadaf Ebrahimi 
printDeviceHeader(cl_device_id device)1568*6467f958SSadaf Ebrahimi int printDeviceHeader(cl_device_id device)
1569*6467f958SSadaf Ebrahimi {
1570*6467f958SSadaf Ebrahimi     char deviceName[512], deviceVendor[512], deviceVersion[512],
1571*6467f958SSadaf Ebrahimi         cLangVersion[512];
1572*6467f958SSadaf Ebrahimi     int error;
1573*6467f958SSadaf Ebrahimi 
1574*6467f958SSadaf Ebrahimi     error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName),
1575*6467f958SSadaf Ebrahimi                             deviceName, NULL);
1576*6467f958SSadaf Ebrahimi     test_error(error, "Unable to get CL_DEVICE_NAME for device");
1577*6467f958SSadaf Ebrahimi 
1578*6467f958SSadaf Ebrahimi     error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(deviceVendor),
1579*6467f958SSadaf Ebrahimi                             deviceVendor, NULL);
1580*6467f958SSadaf Ebrahimi     test_error(error, "Unable to get CL_DEVICE_VENDOR for device");
1581*6467f958SSadaf Ebrahimi 
1582*6467f958SSadaf Ebrahimi     error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion),
1583*6467f958SSadaf Ebrahimi                             deviceVersion, NULL);
1584*6467f958SSadaf Ebrahimi     test_error(error, "Unable to get CL_DEVICE_VERSION for device");
1585*6467f958SSadaf Ebrahimi 
1586*6467f958SSadaf Ebrahimi     error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
1587*6467f958SSadaf Ebrahimi                             sizeof(cLangVersion), cLangVersion, NULL);
1588*6467f958SSadaf Ebrahimi     test_error(error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device");
1589*6467f958SSadaf Ebrahimi 
1590*6467f958SSadaf Ebrahimi     log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute "
1591*6467f958SSadaf Ebrahimi              "Device Version = %s%s%s\n",
1592*6467f958SSadaf Ebrahimi              deviceName, deviceVendor, deviceVersion,
1593*6467f958SSadaf Ebrahimi              (error == CL_SUCCESS) ? ", CL C Version = " : "",
1594*6467f958SSadaf Ebrahimi              (error == CL_SUCCESS) ? cLangVersion : "");
1595*6467f958SSadaf Ebrahimi 
1596*6467f958SSadaf Ebrahimi     auto version = get_device_cl_version(device);
1597*6467f958SSadaf Ebrahimi     if (version >= Version(3, 0))
1598*6467f958SSadaf Ebrahimi     {
1599*6467f958SSadaf Ebrahimi         auto ctsVersion = get_device_info_string(
1600*6467f958SSadaf Ebrahimi             device, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED);
1601*6467f958SSadaf Ebrahimi         log_info("Device latest conformance version passed: %s\n",
1602*6467f958SSadaf Ebrahimi                  ctsVersion.c_str());
1603*6467f958SSadaf Ebrahimi     }
1604*6467f958SSadaf Ebrahimi 
1605*6467f958SSadaf Ebrahimi     return CL_SUCCESS;
1606*6467f958SSadaf Ebrahimi }
1607*6467f958SSadaf Ebrahimi 
get_device_cl_c_version(cl_device_id device)1608*6467f958SSadaf Ebrahimi Version get_device_cl_c_version(cl_device_id device)
1609*6467f958SSadaf Ebrahimi {
1610*6467f958SSadaf Ebrahimi     auto device_cl_version = get_device_cl_version(device);
1611*6467f958SSadaf Ebrahimi 
1612*6467f958SSadaf Ebrahimi     // The second special case is OpenCL-1.0 where CL_DEVICE_OPENCL_C_VERSION
1613*6467f958SSadaf Ebrahimi     // did not exist, but since this is just the first version we can
1614*6467f958SSadaf Ebrahimi     // return 1.0.
1615*6467f958SSadaf Ebrahimi     if (device_cl_version == Version{ 1, 0 })
1616*6467f958SSadaf Ebrahimi     {
1617*6467f958SSadaf Ebrahimi         return Version{ 1, 0 };
1618*6467f958SSadaf Ebrahimi     }
1619*6467f958SSadaf Ebrahimi 
1620*6467f958SSadaf Ebrahimi     // Otherwise we know we have a 1.1 <= device_version <= 2.0 where all CL C
1621*6467f958SSadaf Ebrahimi     // versions are backwards compatible, hence querying with the
1622*6467f958SSadaf Ebrahimi     // CL_DEVICE_OPENCL_C_VERSION query must return the most recent supported
1623*6467f958SSadaf Ebrahimi     // OpenCL C version.
1624*6467f958SSadaf Ebrahimi     size_t opencl_c_version_size_in_bytes{};
1625*6467f958SSadaf Ebrahimi     auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr,
1626*6467f958SSadaf Ebrahimi                                  &opencl_c_version_size_in_bytes);
1627*6467f958SSadaf Ebrahimi     test_error_ret(error,
1628*6467f958SSadaf Ebrahimi                    "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
1629*6467f958SSadaf Ebrahimi                    (Version{ -1, 0 }));
1630*6467f958SSadaf Ebrahimi 
1631*6467f958SSadaf Ebrahimi     std::string opencl_c_version(opencl_c_version_size_in_bytes, '\0');
1632*6467f958SSadaf Ebrahimi     error =
1633*6467f958SSadaf Ebrahimi         clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
1634*6467f958SSadaf Ebrahimi                         opencl_c_version.size(), &opencl_c_version[0], nullptr);
1635*6467f958SSadaf Ebrahimi 
1636*6467f958SSadaf Ebrahimi     test_error_ret(error,
1637*6467f958SSadaf Ebrahimi                    "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
1638*6467f958SSadaf Ebrahimi                    (Version{ -1, 0 }));
1639*6467f958SSadaf Ebrahimi 
1640*6467f958SSadaf Ebrahimi     // Scrape out the major, minor pair from the string.
1641*6467f958SSadaf Ebrahimi     auto major = opencl_c_version[opencl_c_version.find('.') - 1];
1642*6467f958SSadaf Ebrahimi     auto minor = opencl_c_version[opencl_c_version.find('.') + 1];
1643*6467f958SSadaf Ebrahimi 
1644*6467f958SSadaf Ebrahimi     return Version{ major - '0', minor - '0' };
1645*6467f958SSadaf Ebrahimi }
1646*6467f958SSadaf Ebrahimi 
get_device_latest_cl_c_version(cl_device_id device)1647*6467f958SSadaf Ebrahimi Version get_device_latest_cl_c_version(cl_device_id device)
1648*6467f958SSadaf Ebrahimi {
1649*6467f958SSadaf Ebrahimi     auto device_cl_version = get_device_cl_version(device);
1650*6467f958SSadaf Ebrahimi 
1651*6467f958SSadaf Ebrahimi     // If the device version >= 3.0 it must support the
1652*6467f958SSadaf Ebrahimi     // CL_DEVICE_OPENCL_C_ALL_VERSIONS query from which we can extract the most
1653*6467f958SSadaf Ebrahimi     // recent CL C version supported by the device.
1654*6467f958SSadaf Ebrahimi     if (device_cl_version >= Version{ 3, 0 })
1655*6467f958SSadaf Ebrahimi     {
1656*6467f958SSadaf Ebrahimi         size_t opencl_c_all_versions_size_in_bytes{};
1657*6467f958SSadaf Ebrahimi         auto error =
1658*6467f958SSadaf Ebrahimi             clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
1659*6467f958SSadaf Ebrahimi                             &opencl_c_all_versions_size_in_bytes);
1660*6467f958SSadaf Ebrahimi         test_error_ret(
1661*6467f958SSadaf Ebrahimi             error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1662*6467f958SSadaf Ebrahimi             (Version{ -1, 0 }));
1663*6467f958SSadaf Ebrahimi         std::vector<cl_name_version> name_versions(
1664*6467f958SSadaf Ebrahimi             opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
1665*6467f958SSadaf Ebrahimi         error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
1666*6467f958SSadaf Ebrahimi                                 opencl_c_all_versions_size_in_bytes,
1667*6467f958SSadaf Ebrahimi                                 name_versions.data(), nullptr);
1668*6467f958SSadaf Ebrahimi         test_error_ret(
1669*6467f958SSadaf Ebrahimi             error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1670*6467f958SSadaf Ebrahimi             (Version{ -1, 0 }));
1671*6467f958SSadaf Ebrahimi 
1672*6467f958SSadaf Ebrahimi         Version max_supported_cl_c_version{};
1673*6467f958SSadaf Ebrahimi         for (const auto &name_version : name_versions)
1674*6467f958SSadaf Ebrahimi         {
1675*6467f958SSadaf Ebrahimi             Version current_version{
1676*6467f958SSadaf Ebrahimi                 static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
1677*6467f958SSadaf Ebrahimi                 static_cast<int>(CL_VERSION_MINOR(name_version.version))
1678*6467f958SSadaf Ebrahimi             };
1679*6467f958SSadaf Ebrahimi             max_supported_cl_c_version =
1680*6467f958SSadaf Ebrahimi                 (current_version > max_supported_cl_c_version)
1681*6467f958SSadaf Ebrahimi                 ? current_version
1682*6467f958SSadaf Ebrahimi                 : max_supported_cl_c_version;
1683*6467f958SSadaf Ebrahimi         }
1684*6467f958SSadaf Ebrahimi         return max_supported_cl_c_version;
1685*6467f958SSadaf Ebrahimi     }
1686*6467f958SSadaf Ebrahimi 
1687*6467f958SSadaf Ebrahimi     return get_device_cl_c_version(device);
1688*6467f958SSadaf Ebrahimi }
1689*6467f958SSadaf Ebrahimi 
get_max_OpenCL_C_for_context(cl_context context)1690*6467f958SSadaf Ebrahimi Version get_max_OpenCL_C_for_context(cl_context context)
1691*6467f958SSadaf Ebrahimi {
1692*6467f958SSadaf Ebrahimi     // Get all the devices in the context and find the maximum
1693*6467f958SSadaf Ebrahimi     // universally supported OpenCL C version.
1694*6467f958SSadaf Ebrahimi     size_t devices_size_in_bytes{};
1695*6467f958SSadaf Ebrahimi     auto error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr,
1696*6467f958SSadaf Ebrahimi                                   &devices_size_in_bytes);
1697*6467f958SSadaf Ebrahimi     test_error_ret(error, "clGetDeviceInfo failed for CL_CONTEXT_DEVICES",
1698*6467f958SSadaf Ebrahimi                    (Version{ -1, 0 }));
1699*6467f958SSadaf Ebrahimi     std::vector<cl_device_id> devices(devices_size_in_bytes
1700*6467f958SSadaf Ebrahimi                                       / sizeof(cl_device_id));
1701*6467f958SSadaf Ebrahimi     error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size_in_bytes,
1702*6467f958SSadaf Ebrahimi                              devices.data(), nullptr);
1703*6467f958SSadaf Ebrahimi     auto current_version = get_device_latest_cl_c_version(devices[0]);
1704*6467f958SSadaf Ebrahimi     std::for_each(std::next(devices.begin()), devices.end(),
1705*6467f958SSadaf Ebrahimi                   [&current_version](cl_device_id device) {
1706*6467f958SSadaf Ebrahimi                       auto device_version =
1707*6467f958SSadaf Ebrahimi                           get_device_latest_cl_c_version(device);
1708*6467f958SSadaf Ebrahimi                       // OpenCL 3.0 is not backwards compatible with 2.0.
1709*6467f958SSadaf Ebrahimi                       // If we have 3.0 and 2.0 in the same driver we
1710*6467f958SSadaf Ebrahimi                       // use 1.2.
1711*6467f958SSadaf Ebrahimi                       if (((device_version >= Version(2, 0)
1712*6467f958SSadaf Ebrahimi                             && device_version < Version(3, 0))
1713*6467f958SSadaf Ebrahimi                            && current_version >= Version(3, 0))
1714*6467f958SSadaf Ebrahimi                           || (device_version >= Version(3, 0)
1715*6467f958SSadaf Ebrahimi                               && (current_version >= Version(2, 0)
1716*6467f958SSadaf Ebrahimi                                   && current_version < Version(3, 0))))
1717*6467f958SSadaf Ebrahimi                       {
1718*6467f958SSadaf Ebrahimi                           current_version = Version(1, 2);
1719*6467f958SSadaf Ebrahimi                       }
1720*6467f958SSadaf Ebrahimi                       else
1721*6467f958SSadaf Ebrahimi                       {
1722*6467f958SSadaf Ebrahimi                           current_version =
1723*6467f958SSadaf Ebrahimi                               std::min(device_version, current_version);
1724*6467f958SSadaf Ebrahimi                       }
1725*6467f958SSadaf Ebrahimi                   });
1726*6467f958SSadaf Ebrahimi     return current_version;
1727*6467f958SSadaf Ebrahimi }
1728*6467f958SSadaf Ebrahimi 
device_supports_cl_c_version(cl_device_id device,Version version)1729*6467f958SSadaf Ebrahimi bool device_supports_cl_c_version(cl_device_id device, Version version)
1730*6467f958SSadaf Ebrahimi {
1731*6467f958SSadaf Ebrahimi     auto device_cl_version = get_device_cl_version(device);
1732*6467f958SSadaf Ebrahimi 
1733*6467f958SSadaf Ebrahimi     // In general, a device does not support an OpenCL C version if it is <=
1734*6467f958SSadaf Ebrahimi     // CL_DEVICE_OPENCL_C_VERSION AND it does not appear in the
1735*6467f958SSadaf Ebrahimi     // CL_DEVICE_OPENCL_C_ALL_VERSIONS query.
1736*6467f958SSadaf Ebrahimi 
1737*6467f958SSadaf Ebrahimi     // If the device version >= 3.0 it must support the
1738*6467f958SSadaf Ebrahimi     // CL_DEVICE_OPENCL_C_ALL_VERSIONS query, and the version of OpenCL C being
1739*6467f958SSadaf Ebrahimi     // used must appear in the query result if it's <=
1740*6467f958SSadaf Ebrahimi     // CL_DEVICE_OPENCL_C_VERSION.
1741*6467f958SSadaf Ebrahimi     if (device_cl_version >= Version{ 3, 0 })
1742*6467f958SSadaf Ebrahimi     {
1743*6467f958SSadaf Ebrahimi         size_t opencl_c_all_versions_size_in_bytes{};
1744*6467f958SSadaf Ebrahimi         auto error =
1745*6467f958SSadaf Ebrahimi             clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
1746*6467f958SSadaf Ebrahimi                             &opencl_c_all_versions_size_in_bytes);
1747*6467f958SSadaf Ebrahimi         test_error_ret(
1748*6467f958SSadaf Ebrahimi             error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1749*6467f958SSadaf Ebrahimi             (false));
1750*6467f958SSadaf Ebrahimi         std::vector<cl_name_version> name_versions(
1751*6467f958SSadaf Ebrahimi             opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
1752*6467f958SSadaf Ebrahimi         error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
1753*6467f958SSadaf Ebrahimi                                 opencl_c_all_versions_size_in_bytes,
1754*6467f958SSadaf Ebrahimi                                 name_versions.data(), nullptr);
1755*6467f958SSadaf Ebrahimi         test_error_ret(
1756*6467f958SSadaf Ebrahimi             error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1757*6467f958SSadaf Ebrahimi             (false));
1758*6467f958SSadaf Ebrahimi 
1759*6467f958SSadaf Ebrahimi         for (const auto &name_version : name_versions)
1760*6467f958SSadaf Ebrahimi         {
1761*6467f958SSadaf Ebrahimi             Version current_version{
1762*6467f958SSadaf Ebrahimi                 static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
1763*6467f958SSadaf Ebrahimi                 static_cast<int>(CL_VERSION_MINOR(name_version.version))
1764*6467f958SSadaf Ebrahimi             };
1765*6467f958SSadaf Ebrahimi             if (current_version == version)
1766*6467f958SSadaf Ebrahimi             {
1767*6467f958SSadaf Ebrahimi                 return true;
1768*6467f958SSadaf Ebrahimi             }
1769*6467f958SSadaf Ebrahimi         }
1770*6467f958SSadaf Ebrahimi     }
1771*6467f958SSadaf Ebrahimi 
1772*6467f958SSadaf Ebrahimi     return version <= get_device_cl_c_version(device);
1773*6467f958SSadaf Ebrahimi }
1774*6467f958SSadaf Ebrahimi 
poll_until(unsigned timeout_ms,unsigned interval_ms,std::function<bool ()> fn)1775*6467f958SSadaf Ebrahimi bool poll_until(unsigned timeout_ms, unsigned interval_ms,
1776*6467f958SSadaf Ebrahimi                 std::function<bool()> fn)
1777*6467f958SSadaf Ebrahimi {
1778*6467f958SSadaf Ebrahimi     unsigned time_spent_ms = 0;
1779*6467f958SSadaf Ebrahimi     bool ret = false;
1780*6467f958SSadaf Ebrahimi 
1781*6467f958SSadaf Ebrahimi     while (time_spent_ms < timeout_ms)
1782*6467f958SSadaf Ebrahimi     {
1783*6467f958SSadaf Ebrahimi         ret = fn();
1784*6467f958SSadaf Ebrahimi         if (ret)
1785*6467f958SSadaf Ebrahimi         {
1786*6467f958SSadaf Ebrahimi             break;
1787*6467f958SSadaf Ebrahimi         }
1788*6467f958SSadaf Ebrahimi         usleep(interval_ms * 1000);
1789*6467f958SSadaf Ebrahimi         time_spent_ms += interval_ms;
1790*6467f958SSadaf Ebrahimi     }
1791*6467f958SSadaf Ebrahimi 
1792*6467f958SSadaf Ebrahimi     return ret;
1793*6467f958SSadaf Ebrahimi }
1794*6467f958SSadaf Ebrahimi 
device_supports_double(cl_device_id device)1795*6467f958SSadaf Ebrahimi bool device_supports_double(cl_device_id device)
1796*6467f958SSadaf Ebrahimi {
1797*6467f958SSadaf Ebrahimi     if (is_extension_available(device, "cl_khr_fp64"))
1798*6467f958SSadaf Ebrahimi     {
1799*6467f958SSadaf Ebrahimi         return true;
1800*6467f958SSadaf Ebrahimi     }
1801*6467f958SSadaf Ebrahimi     else
1802*6467f958SSadaf Ebrahimi     {
1803*6467f958SSadaf Ebrahimi         cl_device_fp_config double_fp_config;
1804*6467f958SSadaf Ebrahimi         cl_int err = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
1805*6467f958SSadaf Ebrahimi                                      sizeof(double_fp_config),
1806*6467f958SSadaf Ebrahimi                                      &double_fp_config, nullptr);
1807*6467f958SSadaf Ebrahimi         test_error(err,
1808*6467f958SSadaf Ebrahimi                    "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
1809*6467f958SSadaf Ebrahimi         return double_fp_config != 0;
1810*6467f958SSadaf Ebrahimi     }
1811*6467f958SSadaf Ebrahimi }
1812*6467f958SSadaf Ebrahimi 
device_supports_half(cl_device_id device)1813*6467f958SSadaf Ebrahimi bool device_supports_half(cl_device_id device)
1814*6467f958SSadaf Ebrahimi {
1815*6467f958SSadaf Ebrahimi     return is_extension_available(device, "cl_khr_fp16");
1816*6467f958SSadaf Ebrahimi }
1817