1*6467f958SSadaf Ebrahimi //
2*6467f958SSadaf Ebrahimi // Copyright (c) 2017 The Khronos Group Inc.
3*6467f958SSadaf Ebrahimi //
4*6467f958SSadaf Ebrahimi // Licensed under the Apache License, Version 2.0 (the "License");
5*6467f958SSadaf Ebrahimi // you may not use this file except in compliance with the License.
6*6467f958SSadaf Ebrahimi // You may obtain a copy of the License at
7*6467f958SSadaf Ebrahimi //
8*6467f958SSadaf Ebrahimi // http://www.apache.org/licenses/LICENSE-2.0
9*6467f958SSadaf Ebrahimi //
10*6467f958SSadaf Ebrahimi // Unless required by applicable law or agreed to in writing, software
11*6467f958SSadaf Ebrahimi // distributed under the License is distributed on an "AS IS" BASIS,
12*6467f958SSadaf Ebrahimi // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6467f958SSadaf Ebrahimi // See the License for the specific language governing permissions and
14*6467f958SSadaf Ebrahimi // limitations under the License.
15*6467f958SSadaf Ebrahimi //
16*6467f958SSadaf Ebrahimi #include "crc32.h"
17*6467f958SSadaf Ebrahimi #include "kernelHelpers.h"
18*6467f958SSadaf Ebrahimi #include "deviceInfo.h"
19*6467f958SSadaf Ebrahimi #include "errorHelpers.h"
20*6467f958SSadaf Ebrahimi #include "imageHelpers.h"
21*6467f958SSadaf Ebrahimi #include "typeWrappers.h"
22*6467f958SSadaf Ebrahimi #include "testHarness.h"
23*6467f958SSadaf Ebrahimi #include "parseParameters.h"
24*6467f958SSadaf Ebrahimi
25*6467f958SSadaf Ebrahimi #include <cassert>
26*6467f958SSadaf Ebrahimi #include <vector>
27*6467f958SSadaf Ebrahimi #include <string>
28*6467f958SSadaf Ebrahimi #include <fstream>
29*6467f958SSadaf Ebrahimi #include <sstream>
30*6467f958SSadaf Ebrahimi #include <iomanip>
31*6467f958SSadaf Ebrahimi #include <mutex>
32*6467f958SSadaf Ebrahimi #include <algorithm>
33*6467f958SSadaf Ebrahimi
34*6467f958SSadaf Ebrahimi #if defined(_WIN32)
35*6467f958SSadaf Ebrahimi std::string slash = "\\";
36*6467f958SSadaf Ebrahimi #else
37*6467f958SSadaf Ebrahimi std::string slash = "/";
38*6467f958SSadaf Ebrahimi #endif
39*6467f958SSadaf Ebrahimi
40*6467f958SSadaf Ebrahimi static std::mutex gCompilerMutex;
41*6467f958SSadaf Ebrahimi
42*6467f958SSadaf Ebrahimi static cl_int get_first_device_id(const cl_context context,
43*6467f958SSadaf Ebrahimi cl_device_id &device);
44*6467f958SSadaf Ebrahimi
get_file_size(const std::string & fileName)45*6467f958SSadaf Ebrahimi long get_file_size(const std::string &fileName)
46*6467f958SSadaf Ebrahimi {
47*6467f958SSadaf Ebrahimi std::ifstream ifs(fileName.c_str(), std::ios::binary);
48*6467f958SSadaf Ebrahimi if (!ifs.good()) return 0;
49*6467f958SSadaf Ebrahimi // get length of file:
50*6467f958SSadaf Ebrahimi ifs.seekg(0, std::ios::end);
51*6467f958SSadaf Ebrahimi std::ios::pos_type length = ifs.tellg();
52*6467f958SSadaf Ebrahimi return static_cast<long>(length);
53*6467f958SSadaf Ebrahimi }
54*6467f958SSadaf Ebrahimi
get_kernel_content(unsigned int numKernelLines,const char * const * kernelProgram)55*6467f958SSadaf Ebrahimi static std::string get_kernel_content(unsigned int numKernelLines,
56*6467f958SSadaf Ebrahimi const char *const *kernelProgram)
57*6467f958SSadaf Ebrahimi {
58*6467f958SSadaf Ebrahimi std::string kernel;
59*6467f958SSadaf Ebrahimi for (size_t i = 0; i < numKernelLines; ++i)
60*6467f958SSadaf Ebrahimi {
61*6467f958SSadaf Ebrahimi std::string chunk(kernelProgram[i], 0, std::string::npos);
62*6467f958SSadaf Ebrahimi kernel += chunk;
63*6467f958SSadaf Ebrahimi }
64*6467f958SSadaf Ebrahimi
65*6467f958SSadaf Ebrahimi return kernel;
66*6467f958SSadaf Ebrahimi }
67*6467f958SSadaf Ebrahimi
get_kernel_name(const std::string & source)68*6467f958SSadaf Ebrahimi std::string get_kernel_name(const std::string &source)
69*6467f958SSadaf Ebrahimi {
70*6467f958SSadaf Ebrahimi // Create list of kernel names
71*6467f958SSadaf Ebrahimi std::string kernelsList;
72*6467f958SSadaf Ebrahimi size_t kPos = source.find("kernel");
73*6467f958SSadaf Ebrahimi while (kPos != std::string::npos)
74*6467f958SSadaf Ebrahimi {
75*6467f958SSadaf Ebrahimi // check for '__kernel'
76*6467f958SSadaf Ebrahimi size_t pos = kPos;
77*6467f958SSadaf Ebrahimi if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
78*6467f958SSadaf Ebrahimi pos -= 2;
79*6467f958SSadaf Ebrahimi
80*6467f958SSadaf Ebrahimi // check character before 'kernel' (white space expected)
81*6467f958SSadaf Ebrahimi size_t wsPos = source.find_last_of(" \t\r\n", pos);
82*6467f958SSadaf Ebrahimi if (wsPos == std::string::npos || wsPos + 1 == pos)
83*6467f958SSadaf Ebrahimi {
84*6467f958SSadaf Ebrahimi // check character after 'kernel' (white space expected)
85*6467f958SSadaf Ebrahimi size_t akPos = kPos + sizeof("kernel") - 1;
86*6467f958SSadaf Ebrahimi wsPos = source.find_first_of(" \t\r\n", akPos);
87*6467f958SSadaf Ebrahimi if (!(wsPos == akPos))
88*6467f958SSadaf Ebrahimi {
89*6467f958SSadaf Ebrahimi kPos = source.find("kernel", kPos + 1);
90*6467f958SSadaf Ebrahimi continue;
91*6467f958SSadaf Ebrahimi }
92*6467f958SSadaf Ebrahimi
93*6467f958SSadaf Ebrahimi bool attributeFound;
94*6467f958SSadaf Ebrahimi do
95*6467f958SSadaf Ebrahimi {
96*6467f958SSadaf Ebrahimi attributeFound = false;
97*6467f958SSadaf Ebrahimi // find '(' after kernel name name
98*6467f958SSadaf Ebrahimi size_t pPos = source.find("(", akPos);
99*6467f958SSadaf Ebrahimi if (!(pPos != std::string::npos)) continue;
100*6467f958SSadaf Ebrahimi
101*6467f958SSadaf Ebrahimi // check for not empty kernel name before '('
102*6467f958SSadaf Ebrahimi pos = source.find_last_not_of(" \t\r\n", pPos - 1);
103*6467f958SSadaf Ebrahimi if (!(pos != std::string::npos && pos > akPos)) continue;
104*6467f958SSadaf Ebrahimi
105*6467f958SSadaf Ebrahimi // find character before kernel name
106*6467f958SSadaf Ebrahimi wsPos = source.find_last_of(" \t\r\n", pos);
107*6467f958SSadaf Ebrahimi if (!(wsPos != std::string::npos && wsPos >= akPos)) continue;
108*6467f958SSadaf Ebrahimi
109*6467f958SSadaf Ebrahimi std::string name =
110*6467f958SSadaf Ebrahimi source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
111*6467f958SSadaf Ebrahimi // check for kernel attribute
112*6467f958SSadaf Ebrahimi if (name == "__attribute__")
113*6467f958SSadaf Ebrahimi {
114*6467f958SSadaf Ebrahimi attributeFound = true;
115*6467f958SSadaf Ebrahimi int pCount = 1;
116*6467f958SSadaf Ebrahimi akPos = pPos + 1;
117*6467f958SSadaf Ebrahimi while (pCount > 0 && akPos != std::string::npos)
118*6467f958SSadaf Ebrahimi {
119*6467f958SSadaf Ebrahimi akPos = source.find_first_of("()", akPos + 1);
120*6467f958SSadaf Ebrahimi if (akPos != std::string::npos)
121*6467f958SSadaf Ebrahimi {
122*6467f958SSadaf Ebrahimi if (source[akPos] == '(')
123*6467f958SSadaf Ebrahimi pCount++;
124*6467f958SSadaf Ebrahimi else
125*6467f958SSadaf Ebrahimi pCount--;
126*6467f958SSadaf Ebrahimi }
127*6467f958SSadaf Ebrahimi }
128*6467f958SSadaf Ebrahimi }
129*6467f958SSadaf Ebrahimi else
130*6467f958SSadaf Ebrahimi {
131*6467f958SSadaf Ebrahimi kernelsList += name + ".";
132*6467f958SSadaf Ebrahimi }
133*6467f958SSadaf Ebrahimi } while (attributeFound);
134*6467f958SSadaf Ebrahimi }
135*6467f958SSadaf Ebrahimi kPos = source.find("kernel", kPos + 1);
136*6467f958SSadaf Ebrahimi }
137*6467f958SSadaf Ebrahimi std::ostringstream oss;
138*6467f958SSadaf Ebrahimi if (MAX_LEN_FOR_KERNEL_LIST > 0)
139*6467f958SSadaf Ebrahimi {
140*6467f958SSadaf Ebrahimi if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1)
141*6467f958SSadaf Ebrahimi {
142*6467f958SSadaf Ebrahimi kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
143*6467f958SSadaf Ebrahimi kernelsList[kernelsList.size() - 1] = '.';
144*6467f958SSadaf Ebrahimi kernelsList[kernelsList.size() - 1] = '.';
145*6467f958SSadaf Ebrahimi }
146*6467f958SSadaf Ebrahimi oss << kernelsList;
147*6467f958SSadaf Ebrahimi }
148*6467f958SSadaf Ebrahimi return oss.str();
149*6467f958SSadaf Ebrahimi }
150*6467f958SSadaf Ebrahimi
151*6467f958SSadaf Ebrahimi static std::string
get_offline_compilation_file_type_str(const CompilationMode compilationMode)152*6467f958SSadaf Ebrahimi get_offline_compilation_file_type_str(const CompilationMode compilationMode)
153*6467f958SSadaf Ebrahimi {
154*6467f958SSadaf Ebrahimi switch (compilationMode)
155*6467f958SSadaf Ebrahimi {
156*6467f958SSadaf Ebrahimi default: assert(0 && "Invalid compilation mode"); abort();
157*6467f958SSadaf Ebrahimi case kOnline:
158*6467f958SSadaf Ebrahimi assert(0 && "Invalid compilation mode for offline compilation");
159*6467f958SSadaf Ebrahimi abort();
160*6467f958SSadaf Ebrahimi case kBinary: return "binary";
161*6467f958SSadaf Ebrahimi case kSpir_v: return "SPIR-V";
162*6467f958SSadaf Ebrahimi }
163*6467f958SSadaf Ebrahimi }
164*6467f958SSadaf Ebrahimi
get_unique_filename_prefix(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)165*6467f958SSadaf Ebrahimi static std::string get_unique_filename_prefix(unsigned int numKernelLines,
166*6467f958SSadaf Ebrahimi const char *const *kernelProgram,
167*6467f958SSadaf Ebrahimi const char *buildOptions)
168*6467f958SSadaf Ebrahimi {
169*6467f958SSadaf Ebrahimi std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
170*6467f958SSadaf Ebrahimi std::string kernelName = get_kernel_name(kernel);
171*6467f958SSadaf Ebrahimi cl_uint kernelCrc = crc32(kernel.data(), kernel.size());
172*6467f958SSadaf Ebrahimi std::ostringstream oss;
173*6467f958SSadaf Ebrahimi oss << kernelName << std::hex << std::setfill('0') << std::setw(8)
174*6467f958SSadaf Ebrahimi << kernelCrc;
175*6467f958SSadaf Ebrahimi if (buildOptions)
176*6467f958SSadaf Ebrahimi {
177*6467f958SSadaf Ebrahimi cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
178*6467f958SSadaf Ebrahimi oss << '.' << std::hex << std::setfill('0') << std::setw(8)
179*6467f958SSadaf Ebrahimi << bOptionsCrc;
180*6467f958SSadaf Ebrahimi }
181*6467f958SSadaf Ebrahimi return oss.str();
182*6467f958SSadaf Ebrahimi }
183*6467f958SSadaf Ebrahimi
184*6467f958SSadaf Ebrahimi
185*6467f958SSadaf Ebrahimi static std::string
get_cl_build_options_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)186*6467f958SSadaf Ebrahimi get_cl_build_options_filename_with_path(const std::string &filePath,
187*6467f958SSadaf Ebrahimi const std::string &fileNamePrefix)
188*6467f958SSadaf Ebrahimi {
189*6467f958SSadaf Ebrahimi return filePath + slash + fileNamePrefix + ".options";
190*6467f958SSadaf Ebrahimi }
191*6467f958SSadaf Ebrahimi
192*6467f958SSadaf Ebrahimi static std::string
get_cl_source_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)193*6467f958SSadaf Ebrahimi get_cl_source_filename_with_path(const std::string &filePath,
194*6467f958SSadaf Ebrahimi const std::string &fileNamePrefix)
195*6467f958SSadaf Ebrahimi {
196*6467f958SSadaf Ebrahimi return filePath + slash + fileNamePrefix + ".cl";
197*6467f958SSadaf Ebrahimi }
198*6467f958SSadaf Ebrahimi
199*6467f958SSadaf Ebrahimi static std::string
get_binary_filename_with_path(CompilationMode mode,cl_uint deviceAddrSpaceSize,const std::string & filePath,const std::string & fileNamePrefix)200*6467f958SSadaf Ebrahimi get_binary_filename_with_path(CompilationMode mode, cl_uint deviceAddrSpaceSize,
201*6467f958SSadaf Ebrahimi const std::string &filePath,
202*6467f958SSadaf Ebrahimi const std::string &fileNamePrefix)
203*6467f958SSadaf Ebrahimi {
204*6467f958SSadaf Ebrahimi std::string binaryFilename = filePath + slash + fileNamePrefix;
205*6467f958SSadaf Ebrahimi if (kSpir_v == mode)
206*6467f958SSadaf Ebrahimi {
207*6467f958SSadaf Ebrahimi std::ostringstream extension;
208*6467f958SSadaf Ebrahimi extension << ".spv" << deviceAddrSpaceSize;
209*6467f958SSadaf Ebrahimi binaryFilename += extension.str();
210*6467f958SSadaf Ebrahimi }
211*6467f958SSadaf Ebrahimi return binaryFilename;
212*6467f958SSadaf Ebrahimi }
213*6467f958SSadaf Ebrahimi
file_exist_on_disk(const std::string & filePath,const std::string & fileName)214*6467f958SSadaf Ebrahimi static bool file_exist_on_disk(const std::string &filePath,
215*6467f958SSadaf Ebrahimi const std::string &fileName)
216*6467f958SSadaf Ebrahimi {
217*6467f958SSadaf Ebrahimi std::string fileNameWithPath = filePath + slash + fileName;
218*6467f958SSadaf Ebrahimi bool exist = false;
219*6467f958SSadaf Ebrahimi std::ifstream ifs;
220*6467f958SSadaf Ebrahimi
221*6467f958SSadaf Ebrahimi ifs.open(fileNameWithPath.c_str(), std::ios::binary);
222*6467f958SSadaf Ebrahimi if (ifs.good()) exist = true;
223*6467f958SSadaf Ebrahimi ifs.close();
224*6467f958SSadaf Ebrahimi return exist;
225*6467f958SSadaf Ebrahimi }
226*6467f958SSadaf Ebrahimi
should_save_kernel_source_to_disk(CompilationMode mode,CompilationCacheMode cacheMode,const std::string & binaryPath,const std::string & binaryName)227*6467f958SSadaf Ebrahimi static bool should_save_kernel_source_to_disk(CompilationMode mode,
228*6467f958SSadaf Ebrahimi CompilationCacheMode cacheMode,
229*6467f958SSadaf Ebrahimi const std::string &binaryPath,
230*6467f958SSadaf Ebrahimi const std::string &binaryName)
231*6467f958SSadaf Ebrahimi {
232*6467f958SSadaf Ebrahimi bool saveToDisk = false;
233*6467f958SSadaf Ebrahimi if (cacheMode == kCacheModeDumpCl
234*6467f958SSadaf Ebrahimi || (cacheMode == kCacheModeOverwrite && mode != kOnline))
235*6467f958SSadaf Ebrahimi {
236*6467f958SSadaf Ebrahimi saveToDisk = true;
237*6467f958SSadaf Ebrahimi }
238*6467f958SSadaf Ebrahimi if (cacheMode == kCacheModeCompileIfAbsent && mode != kOnline)
239*6467f958SSadaf Ebrahimi {
240*6467f958SSadaf Ebrahimi saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
241*6467f958SSadaf Ebrahimi }
242*6467f958SSadaf Ebrahimi return saveToDisk;
243*6467f958SSadaf Ebrahimi }
244*6467f958SSadaf Ebrahimi
save_kernel_build_options_to_disk(const std::string & path,const std::string & prefix,const char * buildOptions)245*6467f958SSadaf Ebrahimi static int save_kernel_build_options_to_disk(const std::string &path,
246*6467f958SSadaf Ebrahimi const std::string &prefix,
247*6467f958SSadaf Ebrahimi const char *buildOptions)
248*6467f958SSadaf Ebrahimi {
249*6467f958SSadaf Ebrahimi std::string filename =
250*6467f958SSadaf Ebrahimi get_cl_build_options_filename_with_path(path, prefix);
251*6467f958SSadaf Ebrahimi std::ofstream ofs(filename.c_str(), std::ios::binary);
252*6467f958SSadaf Ebrahimi if (!ofs.good())
253*6467f958SSadaf Ebrahimi {
254*6467f958SSadaf Ebrahimi log_info("Can't save kernel build options: %s\n", filename.c_str());
255*6467f958SSadaf Ebrahimi return -1;
256*6467f958SSadaf Ebrahimi }
257*6467f958SSadaf Ebrahimi ofs.write(buildOptions, strlen(buildOptions));
258*6467f958SSadaf Ebrahimi ofs.close();
259*6467f958SSadaf Ebrahimi log_info("Saved kernel build options to file: %s\n", filename.c_str());
260*6467f958SSadaf Ebrahimi return CL_SUCCESS;
261*6467f958SSadaf Ebrahimi }
262*6467f958SSadaf Ebrahimi
save_kernel_source_to_disk(const std::string & path,const std::string & prefix,const std::string & source)263*6467f958SSadaf Ebrahimi static int save_kernel_source_to_disk(const std::string &path,
264*6467f958SSadaf Ebrahimi const std::string &prefix,
265*6467f958SSadaf Ebrahimi const std::string &source)
266*6467f958SSadaf Ebrahimi {
267*6467f958SSadaf Ebrahimi std::string filename = get_cl_source_filename_with_path(path, prefix);
268*6467f958SSadaf Ebrahimi std::ofstream ofs(filename.c_str(), std::ios::binary);
269*6467f958SSadaf Ebrahimi if (!ofs.good())
270*6467f958SSadaf Ebrahimi {
271*6467f958SSadaf Ebrahimi log_info("Can't save kernel source: %s\n", filename.c_str());
272*6467f958SSadaf Ebrahimi return -1;
273*6467f958SSadaf Ebrahimi }
274*6467f958SSadaf Ebrahimi ofs.write(source.c_str(), source.size());
275*6467f958SSadaf Ebrahimi ofs.close();
276*6467f958SSadaf Ebrahimi log_info("Saved kernel source to file: %s\n", filename.c_str());
277*6467f958SSadaf Ebrahimi return CL_SUCCESS;
278*6467f958SSadaf Ebrahimi }
279*6467f958SSadaf Ebrahimi
280*6467f958SSadaf Ebrahimi static int
save_kernel_source_and_options_to_disk(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)281*6467f958SSadaf Ebrahimi save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
282*6467f958SSadaf Ebrahimi const char *const *kernelProgram,
283*6467f958SSadaf Ebrahimi const char *buildOptions)
284*6467f958SSadaf Ebrahimi {
285*6467f958SSadaf Ebrahimi int error;
286*6467f958SSadaf Ebrahimi
287*6467f958SSadaf Ebrahimi std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
288*6467f958SSadaf Ebrahimi std::string kernelNamePrefix =
289*6467f958SSadaf Ebrahimi get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
290*6467f958SSadaf Ebrahimi
291*6467f958SSadaf Ebrahimi // save kernel source to disk
292*6467f958SSadaf Ebrahimi error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix,
293*6467f958SSadaf Ebrahimi kernel);
294*6467f958SSadaf Ebrahimi
295*6467f958SSadaf Ebrahimi // save kernel build options to disk if exists
296*6467f958SSadaf Ebrahimi if (buildOptions != NULL)
297*6467f958SSadaf Ebrahimi error |= save_kernel_build_options_to_disk(
298*6467f958SSadaf Ebrahimi gCompilationCachePath, kernelNamePrefix, buildOptions);
299*6467f958SSadaf Ebrahimi
300*6467f958SSadaf Ebrahimi return error;
301*6467f958SSadaf Ebrahimi }
302*6467f958SSadaf Ebrahimi
303*6467f958SSadaf Ebrahimi static std::string
get_compilation_mode_str(const CompilationMode compilationMode)304*6467f958SSadaf Ebrahimi get_compilation_mode_str(const CompilationMode compilationMode)
305*6467f958SSadaf Ebrahimi {
306*6467f958SSadaf Ebrahimi switch (compilationMode)
307*6467f958SSadaf Ebrahimi {
308*6467f958SSadaf Ebrahimi default: assert(0 && "Invalid compilation mode"); abort();
309*6467f958SSadaf Ebrahimi case kOnline: return "online";
310*6467f958SSadaf Ebrahimi case kBinary: return "binary";
311*6467f958SSadaf Ebrahimi case kSpir_v: return "spir-v";
312*6467f958SSadaf Ebrahimi }
313*6467f958SSadaf Ebrahimi }
314*6467f958SSadaf Ebrahimi
get_cl_device_info_str(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfo)315*6467f958SSadaf Ebrahimi static cl_int get_cl_device_info_str(const cl_device_id device,
316*6467f958SSadaf Ebrahimi const cl_uint device_address_space_size,
317*6467f958SSadaf Ebrahimi const CompilationMode compilationMode,
318*6467f958SSadaf Ebrahimi std::string &clDeviceInfo)
319*6467f958SSadaf Ebrahimi {
320*6467f958SSadaf Ebrahimi std::string extensionsString = get_device_extensions_string(device);
321*6467f958SSadaf Ebrahimi std::string versionString = get_device_version_string(device);
322*6467f958SSadaf Ebrahimi
323*6467f958SSadaf Ebrahimi std::ostringstream clDeviceInfoStream;
324*6467f958SSadaf Ebrahimi std::string file_type =
325*6467f958SSadaf Ebrahimi get_offline_compilation_file_type_str(compilationMode);
326*6467f958SSadaf Ebrahimi clDeviceInfoStream << "# OpenCL device info affecting " << file_type
327*6467f958SSadaf Ebrahimi << " offline compilation:" << std::endl
328*6467f958SSadaf Ebrahimi << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size
329*6467f958SSadaf Ebrahimi << std::endl
330*6467f958SSadaf Ebrahimi << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\""
331*6467f958SSadaf Ebrahimi << std::endl;
332*6467f958SSadaf Ebrahimi /* We only need the device's supported IL version(s) when compiling IL
333*6467f958SSadaf Ebrahimi * that will be loaded with clCreateProgramWithIL() */
334*6467f958SSadaf Ebrahimi if (compilationMode == kSpir_v)
335*6467f958SSadaf Ebrahimi {
336*6467f958SSadaf Ebrahimi std::string ilVersionString = get_device_il_version_string(device);
337*6467f958SSadaf Ebrahimi clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString
338*6467f958SSadaf Ebrahimi << "\"" << std::endl;
339*6467f958SSadaf Ebrahimi }
340*6467f958SSadaf Ebrahimi clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\""
341*6467f958SSadaf Ebrahimi << std::endl;
342*6467f958SSadaf Ebrahimi clDeviceInfoStream << "CL_DEVICE_IMAGE_SUPPORT="
343*6467f958SSadaf Ebrahimi << (0 == checkForImageSupport(device)) << std::endl;
344*6467f958SSadaf Ebrahimi clDeviceInfoStream << "CL_DEVICE_NAME=\"" << get_device_name(device).c_str()
345*6467f958SSadaf Ebrahimi << "\"" << std::endl;
346*6467f958SSadaf Ebrahimi
347*6467f958SSadaf Ebrahimi clDeviceInfo = clDeviceInfoStream.str();
348*6467f958SSadaf Ebrahimi
349*6467f958SSadaf Ebrahimi return CL_SUCCESS;
350*6467f958SSadaf Ebrahimi }
351*6467f958SSadaf Ebrahimi
write_cl_device_info(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfoFilename)352*6467f958SSadaf Ebrahimi static int write_cl_device_info(const cl_device_id device,
353*6467f958SSadaf Ebrahimi const cl_uint device_address_space_size,
354*6467f958SSadaf Ebrahimi const CompilationMode compilationMode,
355*6467f958SSadaf Ebrahimi std::string &clDeviceInfoFilename)
356*6467f958SSadaf Ebrahimi {
357*6467f958SSadaf Ebrahimi std::string clDeviceInfo;
358*6467f958SSadaf Ebrahimi int error = get_cl_device_info_str(device, device_address_space_size,
359*6467f958SSadaf Ebrahimi compilationMode, clDeviceInfo);
360*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS)
361*6467f958SSadaf Ebrahimi {
362*6467f958SSadaf Ebrahimi return error;
363*6467f958SSadaf Ebrahimi }
364*6467f958SSadaf Ebrahimi
365*6467f958SSadaf Ebrahimi cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size());
366*6467f958SSadaf Ebrahimi
367*6467f958SSadaf Ebrahimi /* Get the filename for the clDeviceInfo file.
368*6467f958SSadaf Ebrahimi * Note: the file includes the hash on its content, so it is usually
369*6467f958SSadaf Ebrahimi * unnecessary to delete it. */
370*6467f958SSadaf Ebrahimi std::ostringstream clDeviceInfoFilenameStream;
371*6467f958SSadaf Ebrahimi clDeviceInfoFilenameStream << gCompilationCachePath << slash
372*6467f958SSadaf Ebrahimi << "clDeviceInfo-";
373*6467f958SSadaf Ebrahimi clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8)
374*6467f958SSadaf Ebrahimi << crc << ".txt";
375*6467f958SSadaf Ebrahimi
376*6467f958SSadaf Ebrahimi clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
377*6467f958SSadaf Ebrahimi
378*6467f958SSadaf Ebrahimi if ((size_t)get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
379*6467f958SSadaf Ebrahimi {
380*6467f958SSadaf Ebrahimi /* The CL device info file has already been created.
381*6467f958SSadaf Ebrahimi * Nothing to do. */
382*6467f958SSadaf Ebrahimi return 0;
383*6467f958SSadaf Ebrahimi }
384*6467f958SSadaf Ebrahimi
385*6467f958SSadaf Ebrahimi /* The file does not exist or its length is not as expected.
386*6467f958SSadaf Ebrahimi * Create/overwrite it. */
387*6467f958SSadaf Ebrahimi std::ofstream ofs(clDeviceInfoFilename);
388*6467f958SSadaf Ebrahimi if (!ofs.good())
389*6467f958SSadaf Ebrahimi {
390*6467f958SSadaf Ebrahimi log_info("OfflineCompiler: can't create CL device info file: %s\n",
391*6467f958SSadaf Ebrahimi clDeviceInfoFilename.c_str());
392*6467f958SSadaf Ebrahimi return -1;
393*6467f958SSadaf Ebrahimi }
394*6467f958SSadaf Ebrahimi ofs << clDeviceInfo;
395*6467f958SSadaf Ebrahimi ofs.close();
396*6467f958SSadaf Ebrahimi
397*6467f958SSadaf Ebrahimi return CL_SUCCESS;
398*6467f958SSadaf Ebrahimi }
399*6467f958SSadaf Ebrahimi
get_offline_compilation_command(const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename,const std::string & clDeviceInfoFilename)400*6467f958SSadaf Ebrahimi static std::string get_offline_compilation_command(
401*6467f958SSadaf Ebrahimi const cl_uint device_address_space_size,
402*6467f958SSadaf Ebrahimi const CompilationMode compilationMode, const std::string &bOptions,
403*6467f958SSadaf Ebrahimi const std::string &sourceFilename, const std::string &outputFilename,
404*6467f958SSadaf Ebrahimi const std::string &clDeviceInfoFilename)
405*6467f958SSadaf Ebrahimi {
406*6467f958SSadaf Ebrahimi std::ostringstream wrapperOptions;
407*6467f958SSadaf Ebrahimi
408*6467f958SSadaf Ebrahimi wrapperOptions << gCompilationProgram
409*6467f958SSadaf Ebrahimi << " --mode=" << get_compilation_mode_str(compilationMode)
410*6467f958SSadaf Ebrahimi << " --source=" << sourceFilename
411*6467f958SSadaf Ebrahimi << " --output=" << outputFilename
412*6467f958SSadaf Ebrahimi << " --cl-device-info=" << clDeviceInfoFilename;
413*6467f958SSadaf Ebrahimi
414*6467f958SSadaf Ebrahimi if (bOptions != "")
415*6467f958SSadaf Ebrahimi {
416*6467f958SSadaf Ebrahimi // Add build options passed to this function
417*6467f958SSadaf Ebrahimi wrapperOptions << " -- " << bOptions;
418*6467f958SSadaf Ebrahimi }
419*6467f958SSadaf Ebrahimi
420*6467f958SSadaf Ebrahimi return wrapperOptions.str();
421*6467f958SSadaf Ebrahimi }
422*6467f958SSadaf Ebrahimi
invoke_offline_compiler(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename)423*6467f958SSadaf Ebrahimi static int invoke_offline_compiler(const cl_device_id device,
424*6467f958SSadaf Ebrahimi const cl_uint device_address_space_size,
425*6467f958SSadaf Ebrahimi const CompilationMode compilationMode,
426*6467f958SSadaf Ebrahimi const std::string &bOptions,
427*6467f958SSadaf Ebrahimi const std::string &sourceFilename,
428*6467f958SSadaf Ebrahimi const std::string &outputFilename)
429*6467f958SSadaf Ebrahimi {
430*6467f958SSadaf Ebrahimi std::string runString;
431*6467f958SSadaf Ebrahimi std::string clDeviceInfoFilename;
432*6467f958SSadaf Ebrahimi
433*6467f958SSadaf Ebrahimi // See cl_offline_compiler-interface.txt for a description of the
434*6467f958SSadaf Ebrahimi // format of the CL device information file generated below, and
435*6467f958SSadaf Ebrahimi // the internal command line interface for invoking the offline
436*6467f958SSadaf Ebrahimi // compiler.
437*6467f958SSadaf Ebrahimi
438*6467f958SSadaf Ebrahimi cl_int err = write_cl_device_info(device, device_address_space_size,
439*6467f958SSadaf Ebrahimi compilationMode, clDeviceInfoFilename);
440*6467f958SSadaf Ebrahimi if (err != CL_SUCCESS)
441*6467f958SSadaf Ebrahimi {
442*6467f958SSadaf Ebrahimi log_error("Failed writing CL device info file\n");
443*6467f958SSadaf Ebrahimi return err;
444*6467f958SSadaf Ebrahimi }
445*6467f958SSadaf Ebrahimi
446*6467f958SSadaf Ebrahimi runString = get_offline_compilation_command(
447*6467f958SSadaf Ebrahimi device_address_space_size, compilationMode, bOptions, sourceFilename,
448*6467f958SSadaf Ebrahimi outputFilename, clDeviceInfoFilename);
449*6467f958SSadaf Ebrahimi
450*6467f958SSadaf Ebrahimi // execute script
451*6467f958SSadaf Ebrahimi log_info("Executing command: %s\n", runString.c_str());
452*6467f958SSadaf Ebrahimi fflush(stdout);
453*6467f958SSadaf Ebrahimi int returnCode = system(runString.c_str());
454*6467f958SSadaf Ebrahimi if (returnCode != 0)
455*6467f958SSadaf Ebrahimi {
456*6467f958SSadaf Ebrahimi log_error("ERROR: Command finished with error: 0x%x\n", returnCode);
457*6467f958SSadaf Ebrahimi return CL_COMPILE_PROGRAM_FAILURE;
458*6467f958SSadaf Ebrahimi }
459*6467f958SSadaf Ebrahimi
460*6467f958SSadaf Ebrahimi return CL_SUCCESS;
461*6467f958SSadaf Ebrahimi }
462*6467f958SSadaf Ebrahimi
get_first_device_id(const cl_context context,cl_device_id & device)463*6467f958SSadaf Ebrahimi static cl_int get_first_device_id(const cl_context context,
464*6467f958SSadaf Ebrahimi cl_device_id &device)
465*6467f958SSadaf Ebrahimi {
466*6467f958SSadaf Ebrahimi cl_uint numDevices = 0;
467*6467f958SSadaf Ebrahimi cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
468*6467f958SSadaf Ebrahimi sizeof(cl_uint), &numDevices, NULL);
469*6467f958SSadaf Ebrahimi test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
470*6467f958SSadaf Ebrahimi
471*6467f958SSadaf Ebrahimi if (numDevices == 0)
472*6467f958SSadaf Ebrahimi {
473*6467f958SSadaf Ebrahimi log_error("ERROR: No CL devices found\n");
474*6467f958SSadaf Ebrahimi return -1;
475*6467f958SSadaf Ebrahimi }
476*6467f958SSadaf Ebrahimi
477*6467f958SSadaf Ebrahimi std::vector<cl_device_id> devices(numDevices, 0);
478*6467f958SSadaf Ebrahimi error =
479*6467f958SSadaf Ebrahimi clGetContextInfo(context, CL_CONTEXT_DEVICES,
480*6467f958SSadaf Ebrahimi numDevices * sizeof(cl_device_id), &devices[0], NULL);
481*6467f958SSadaf Ebrahimi test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
482*6467f958SSadaf Ebrahimi
483*6467f958SSadaf Ebrahimi device = devices[0];
484*6467f958SSadaf Ebrahimi return CL_SUCCESS;
485*6467f958SSadaf Ebrahimi }
486*6467f958SSadaf Ebrahimi
get_device_address_bits(const cl_device_id device,cl_uint & device_address_space_size)487*6467f958SSadaf Ebrahimi static cl_int get_device_address_bits(const cl_device_id device,
488*6467f958SSadaf Ebrahimi cl_uint &device_address_space_size)
489*6467f958SSadaf Ebrahimi {
490*6467f958SSadaf Ebrahimi cl_int error =
491*6467f958SSadaf Ebrahimi clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
492*6467f958SSadaf Ebrahimi &device_address_space_size, NULL);
493*6467f958SSadaf Ebrahimi test_error(error, "Unable to obtain device address bits");
494*6467f958SSadaf Ebrahimi
495*6467f958SSadaf Ebrahimi if (device_address_space_size != 32 && device_address_space_size != 64)
496*6467f958SSadaf Ebrahimi {
497*6467f958SSadaf Ebrahimi log_error("ERROR: Unexpected number of device address bits: %u\n",
498*6467f958SSadaf Ebrahimi device_address_space_size);
499*6467f958SSadaf Ebrahimi return -1;
500*6467f958SSadaf Ebrahimi }
501*6467f958SSadaf Ebrahimi
502*6467f958SSadaf Ebrahimi return CL_SUCCESS;
503*6467f958SSadaf Ebrahimi }
504*6467f958SSadaf Ebrahimi
get_offline_compiler_output(std::ifstream & ifs,const cl_device_id device,cl_uint deviceAddrSpaceSize,const CompilationMode compilationMode,const std::string & bOptions,const std::string & kernelPath,const std::string & kernelNamePrefix)505*6467f958SSadaf Ebrahimi static int get_offline_compiler_output(
506*6467f958SSadaf Ebrahimi std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize,
507*6467f958SSadaf Ebrahimi const CompilationMode compilationMode, const std::string &bOptions,
508*6467f958SSadaf Ebrahimi const std::string &kernelPath, const std::string &kernelNamePrefix)
509*6467f958SSadaf Ebrahimi {
510*6467f958SSadaf Ebrahimi std::string sourceFilename =
511*6467f958SSadaf Ebrahimi get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
512*6467f958SSadaf Ebrahimi std::string outputFilename = get_binary_filename_with_path(
513*6467f958SSadaf Ebrahimi compilationMode, deviceAddrSpaceSize, kernelPath, kernelNamePrefix);
514*6467f958SSadaf Ebrahimi
515*6467f958SSadaf Ebrahimi ifs.open(outputFilename.c_str(), std::ios::binary);
516*6467f958SSadaf Ebrahimi if (!ifs.good())
517*6467f958SSadaf Ebrahimi {
518*6467f958SSadaf Ebrahimi std::string file_type =
519*6467f958SSadaf Ebrahimi get_offline_compilation_file_type_str(compilationMode);
520*6467f958SSadaf Ebrahimi if (gCompilationCacheMode == kCacheModeForceRead)
521*6467f958SSadaf Ebrahimi {
522*6467f958SSadaf Ebrahimi log_info("OfflineCompiler: can't open cached %s file: %s\n",
523*6467f958SSadaf Ebrahimi file_type.c_str(), outputFilename.c_str());
524*6467f958SSadaf Ebrahimi return -1;
525*6467f958SSadaf Ebrahimi }
526*6467f958SSadaf Ebrahimi else
527*6467f958SSadaf Ebrahimi {
528*6467f958SSadaf Ebrahimi int error = invoke_offline_compiler(device, deviceAddrSpaceSize,
529*6467f958SSadaf Ebrahimi compilationMode, bOptions,
530*6467f958SSadaf Ebrahimi sourceFilename, outputFilename);
531*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS) return error;
532*6467f958SSadaf Ebrahimi
533*6467f958SSadaf Ebrahimi // open output file for reading
534*6467f958SSadaf Ebrahimi ifs.open(outputFilename.c_str(), std::ios::binary);
535*6467f958SSadaf Ebrahimi if (!ifs.good())
536*6467f958SSadaf Ebrahimi {
537*6467f958SSadaf Ebrahimi log_info("OfflineCompiler: can't read generated %s file: %s\n",
538*6467f958SSadaf Ebrahimi file_type.c_str(), outputFilename.c_str());
539*6467f958SSadaf Ebrahimi return -1;
540*6467f958SSadaf Ebrahimi }
541*6467f958SSadaf Ebrahimi }
542*6467f958SSadaf Ebrahimi }
543*6467f958SSadaf Ebrahimi
544*6467f958SSadaf Ebrahimi if (compilationMode == kSpir_v && !gDisableSPIRVValidation)
545*6467f958SSadaf Ebrahimi {
546*6467f958SSadaf Ebrahimi std::string runString = gSPIRVValidator + " " + outputFilename;
547*6467f958SSadaf Ebrahimi
548*6467f958SSadaf Ebrahimi int returnCode = system(runString.c_str());
549*6467f958SSadaf Ebrahimi if (returnCode == -1)
550*6467f958SSadaf Ebrahimi {
551*6467f958SSadaf Ebrahimi log_error("Error: failed to invoke SPIR-V validator\n");
552*6467f958SSadaf Ebrahimi return CL_COMPILE_PROGRAM_FAILURE;
553*6467f958SSadaf Ebrahimi }
554*6467f958SSadaf Ebrahimi else if (returnCode != 0)
555*6467f958SSadaf Ebrahimi {
556*6467f958SSadaf Ebrahimi log_error(
557*6467f958SSadaf Ebrahimi "Failed to validate SPIR-V file %s: system() returned 0x%x\n",
558*6467f958SSadaf Ebrahimi outputFilename.c_str(), returnCode);
559*6467f958SSadaf Ebrahimi return CL_COMPILE_PROGRAM_FAILURE;
560*6467f958SSadaf Ebrahimi }
561*6467f958SSadaf Ebrahimi }
562*6467f958SSadaf Ebrahimi
563*6467f958SSadaf Ebrahimi return CL_SUCCESS;
564*6467f958SSadaf Ebrahimi }
565*6467f958SSadaf Ebrahimi
create_single_kernel_helper_create_program_offline(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions,CompilationMode compilationMode)566*6467f958SSadaf Ebrahimi static int create_single_kernel_helper_create_program_offline(
567*6467f958SSadaf Ebrahimi cl_context context, cl_device_id device, cl_program *outProgram,
568*6467f958SSadaf Ebrahimi unsigned int numKernelLines, const char *const *kernelProgram,
569*6467f958SSadaf Ebrahimi const char *buildOptions, CompilationMode compilationMode)
570*6467f958SSadaf Ebrahimi {
571*6467f958SSadaf Ebrahimi if (kCacheModeDumpCl == gCompilationCacheMode)
572*6467f958SSadaf Ebrahimi {
573*6467f958SSadaf Ebrahimi return -1;
574*6467f958SSadaf Ebrahimi }
575*6467f958SSadaf Ebrahimi
576*6467f958SSadaf Ebrahimi // Get device CL_DEVICE_ADDRESS_BITS
577*6467f958SSadaf Ebrahimi int error;
578*6467f958SSadaf Ebrahimi cl_uint device_address_space_size = 0;
579*6467f958SSadaf Ebrahimi if (device == NULL)
580*6467f958SSadaf Ebrahimi {
581*6467f958SSadaf Ebrahimi error = get_first_device_id(context, device);
582*6467f958SSadaf Ebrahimi test_error(error, "Failed to get device ID for first device");
583*6467f958SSadaf Ebrahimi }
584*6467f958SSadaf Ebrahimi error = get_device_address_bits(device, device_address_space_size);
585*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS) return error;
586*6467f958SSadaf Ebrahimi
587*6467f958SSadaf Ebrahimi // set build options
588*6467f958SSadaf Ebrahimi std::string bOptions;
589*6467f958SSadaf Ebrahimi bOptions += buildOptions ? std::string(buildOptions) : "";
590*6467f958SSadaf Ebrahimi
591*6467f958SSadaf Ebrahimi std::string kernelName =
592*6467f958SSadaf Ebrahimi get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
593*6467f958SSadaf Ebrahimi
594*6467f958SSadaf Ebrahimi
595*6467f958SSadaf Ebrahimi std::ifstream ifs;
596*6467f958SSadaf Ebrahimi error = get_offline_compiler_output(ifs, device, device_address_space_size,
597*6467f958SSadaf Ebrahimi compilationMode, bOptions,
598*6467f958SSadaf Ebrahimi gCompilationCachePath, kernelName);
599*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS) return error;
600*6467f958SSadaf Ebrahimi
601*6467f958SSadaf Ebrahimi ifs.seekg(0, ifs.end);
602*6467f958SSadaf Ebrahimi size_t length = static_cast<size_t>(ifs.tellg());
603*6467f958SSadaf Ebrahimi ifs.seekg(0, ifs.beg);
604*6467f958SSadaf Ebrahimi
605*6467f958SSadaf Ebrahimi // treat modifiedProgram as input for clCreateProgramWithBinary
606*6467f958SSadaf Ebrahimi if (compilationMode == kBinary)
607*6467f958SSadaf Ebrahimi {
608*6467f958SSadaf Ebrahimi // read binary from file:
609*6467f958SSadaf Ebrahimi std::vector<unsigned char> modifiedKernelBuf(length);
610*6467f958SSadaf Ebrahimi
611*6467f958SSadaf Ebrahimi ifs.read((char *)&modifiedKernelBuf[0], length);
612*6467f958SSadaf Ebrahimi ifs.close();
613*6467f958SSadaf Ebrahimi
614*6467f958SSadaf Ebrahimi size_t lengths = modifiedKernelBuf.size();
615*6467f958SSadaf Ebrahimi const unsigned char *binaries = { &modifiedKernelBuf[0] };
616*6467f958SSadaf Ebrahimi log_info("offlineCompiler: clCreateProgramWithSource replaced with "
617*6467f958SSadaf Ebrahimi "clCreateProgramWithBinary\n");
618*6467f958SSadaf Ebrahimi *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths,
619*6467f958SSadaf Ebrahimi &binaries, NULL, &error);
620*6467f958SSadaf Ebrahimi if (*outProgram == NULL || error != CL_SUCCESS)
621*6467f958SSadaf Ebrahimi {
622*6467f958SSadaf Ebrahimi print_error(error, "clCreateProgramWithBinary failed");
623*6467f958SSadaf Ebrahimi return error;
624*6467f958SSadaf Ebrahimi }
625*6467f958SSadaf Ebrahimi }
626*6467f958SSadaf Ebrahimi // treat modifiedProgram as input for clCreateProgramWithIL
627*6467f958SSadaf Ebrahimi else if (compilationMode == kSpir_v)
628*6467f958SSadaf Ebrahimi {
629*6467f958SSadaf Ebrahimi // read spir-v from file:
630*6467f958SSadaf Ebrahimi std::vector<unsigned char> modifiedKernelBuf(length);
631*6467f958SSadaf Ebrahimi
632*6467f958SSadaf Ebrahimi ifs.read((char *)&modifiedKernelBuf[0], length);
633*6467f958SSadaf Ebrahimi ifs.close();
634*6467f958SSadaf Ebrahimi
635*6467f958SSadaf Ebrahimi size_t length = modifiedKernelBuf.size();
636*6467f958SSadaf Ebrahimi log_info("offlineCompiler: clCreateProgramWithSource replaced with "
637*6467f958SSadaf Ebrahimi "clCreateProgramWithIL\n");
638*6467f958SSadaf Ebrahimi if (gCoreILProgram)
639*6467f958SSadaf Ebrahimi {
640*6467f958SSadaf Ebrahimi *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0],
641*6467f958SSadaf Ebrahimi length, &error);
642*6467f958SSadaf Ebrahimi }
643*6467f958SSadaf Ebrahimi else
644*6467f958SSadaf Ebrahimi {
645*6467f958SSadaf Ebrahimi cl_platform_id platform;
646*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
647*6467f958SSadaf Ebrahimi sizeof(cl_platform_id), &platform, NULL);
648*6467f958SSadaf Ebrahimi test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
649*6467f958SSadaf Ebrahimi
650*6467f958SSadaf Ebrahimi clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
651*6467f958SSadaf Ebrahimi clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
652*6467f958SSadaf Ebrahimi clGetExtensionFunctionAddressForPlatform(
653*6467f958SSadaf Ebrahimi platform, "clCreateProgramWithILKHR");
654*6467f958SSadaf Ebrahimi if (clCreateProgramWithILKHR == NULL)
655*6467f958SSadaf Ebrahimi {
656*6467f958SSadaf Ebrahimi log_error(
657*6467f958SSadaf Ebrahimi "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
658*6467f958SSadaf Ebrahimi return -1;
659*6467f958SSadaf Ebrahimi }
660*6467f958SSadaf Ebrahimi *outProgram = clCreateProgramWithILKHR(
661*6467f958SSadaf Ebrahimi context, &modifiedKernelBuf[0], length, &error);
662*6467f958SSadaf Ebrahimi }
663*6467f958SSadaf Ebrahimi
664*6467f958SSadaf Ebrahimi if (*outProgram == NULL || error != CL_SUCCESS)
665*6467f958SSadaf Ebrahimi {
666*6467f958SSadaf Ebrahimi if (gCoreILProgram)
667*6467f958SSadaf Ebrahimi {
668*6467f958SSadaf Ebrahimi print_error(error, "clCreateProgramWithIL failed");
669*6467f958SSadaf Ebrahimi }
670*6467f958SSadaf Ebrahimi else
671*6467f958SSadaf Ebrahimi {
672*6467f958SSadaf Ebrahimi print_error(error, "clCreateProgramWithILKHR failed");
673*6467f958SSadaf Ebrahimi }
674*6467f958SSadaf Ebrahimi return error;
675*6467f958SSadaf Ebrahimi }
676*6467f958SSadaf Ebrahimi }
677*6467f958SSadaf Ebrahimi
678*6467f958SSadaf Ebrahimi return CL_SUCCESS;
679*6467f958SSadaf Ebrahimi }
680*6467f958SSadaf Ebrahimi
create_single_kernel_helper_create_program(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions,CompilationMode compilationMode)681*6467f958SSadaf Ebrahimi static int create_single_kernel_helper_create_program(
682*6467f958SSadaf Ebrahimi cl_context context, cl_device_id device, cl_program *outProgram,
683*6467f958SSadaf Ebrahimi unsigned int numKernelLines, const char **kernelProgram,
684*6467f958SSadaf Ebrahimi const char *buildOptions, CompilationMode compilationMode)
685*6467f958SSadaf Ebrahimi {
686*6467f958SSadaf Ebrahimi std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
687*6467f958SSadaf Ebrahimi
688*6467f958SSadaf Ebrahimi std::string filePrefix =
689*6467f958SSadaf Ebrahimi get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
690*6467f958SSadaf Ebrahimi bool shouldSaveToDisk = should_save_kernel_source_to_disk(
691*6467f958SSadaf Ebrahimi compilationMode, gCompilationCacheMode, gCompilationCachePath,
692*6467f958SSadaf Ebrahimi filePrefix);
693*6467f958SSadaf Ebrahimi
694*6467f958SSadaf Ebrahimi if (shouldSaveToDisk)
695*6467f958SSadaf Ebrahimi {
696*6467f958SSadaf Ebrahimi if (CL_SUCCESS
697*6467f958SSadaf Ebrahimi != save_kernel_source_and_options_to_disk(
698*6467f958SSadaf Ebrahimi numKernelLines, kernelProgram, buildOptions))
699*6467f958SSadaf Ebrahimi {
700*6467f958SSadaf Ebrahimi log_error("Unable to dump kernel source to disk");
701*6467f958SSadaf Ebrahimi return -1;
702*6467f958SSadaf Ebrahimi }
703*6467f958SSadaf Ebrahimi }
704*6467f958SSadaf Ebrahimi if (compilationMode == kOnline)
705*6467f958SSadaf Ebrahimi {
706*6467f958SSadaf Ebrahimi int error = CL_SUCCESS;
707*6467f958SSadaf Ebrahimi
708*6467f958SSadaf Ebrahimi /* Create the program object from source */
709*6467f958SSadaf Ebrahimi *outProgram = clCreateProgramWithSource(context, numKernelLines,
710*6467f958SSadaf Ebrahimi kernelProgram, NULL, &error);
711*6467f958SSadaf Ebrahimi if (*outProgram == NULL || error != CL_SUCCESS)
712*6467f958SSadaf Ebrahimi {
713*6467f958SSadaf Ebrahimi print_error(error, "clCreateProgramWithSource failed");
714*6467f958SSadaf Ebrahimi return error;
715*6467f958SSadaf Ebrahimi }
716*6467f958SSadaf Ebrahimi return CL_SUCCESS;
717*6467f958SSadaf Ebrahimi }
718*6467f958SSadaf Ebrahimi else
719*6467f958SSadaf Ebrahimi {
720*6467f958SSadaf Ebrahimi return create_single_kernel_helper_create_program_offline(
721*6467f958SSadaf Ebrahimi context, device, outProgram, numKernelLines, kernelProgram,
722*6467f958SSadaf Ebrahimi buildOptions, compilationMode);
723*6467f958SSadaf Ebrahimi }
724*6467f958SSadaf Ebrahimi }
725*6467f958SSadaf Ebrahimi
create_single_kernel_helper_create_program(cl_context context,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)726*6467f958SSadaf Ebrahimi int create_single_kernel_helper_create_program(cl_context context,
727*6467f958SSadaf Ebrahimi cl_program *outProgram,
728*6467f958SSadaf Ebrahimi unsigned int numKernelLines,
729*6467f958SSadaf Ebrahimi const char **kernelProgram,
730*6467f958SSadaf Ebrahimi const char *buildOptions)
731*6467f958SSadaf Ebrahimi {
732*6467f958SSadaf Ebrahimi return create_single_kernel_helper_create_program(
733*6467f958SSadaf Ebrahimi context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
734*6467f958SSadaf Ebrahimi gCompilationMode);
735*6467f958SSadaf Ebrahimi }
736*6467f958SSadaf Ebrahimi
create_single_kernel_helper_create_program_for_device(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)737*6467f958SSadaf Ebrahimi int create_single_kernel_helper_create_program_for_device(
738*6467f958SSadaf Ebrahimi cl_context context, cl_device_id device, cl_program *outProgram,
739*6467f958SSadaf Ebrahimi unsigned int numKernelLines, const char **kernelProgram,
740*6467f958SSadaf Ebrahimi const char *buildOptions)
741*6467f958SSadaf Ebrahimi {
742*6467f958SSadaf Ebrahimi return create_single_kernel_helper_create_program(
743*6467f958SSadaf Ebrahimi context, device, outProgram, numKernelLines, kernelProgram,
744*6467f958SSadaf Ebrahimi buildOptions, gCompilationMode);
745*6467f958SSadaf Ebrahimi }
746*6467f958SSadaf Ebrahimi
create_single_kernel_helper_with_build_options(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)747*6467f958SSadaf Ebrahimi int create_single_kernel_helper_with_build_options(
748*6467f958SSadaf Ebrahimi cl_context context, cl_program *outProgram, cl_kernel *outKernel,
749*6467f958SSadaf Ebrahimi unsigned int numKernelLines, const char **kernelProgram,
750*6467f958SSadaf Ebrahimi const char *kernelName, const char *buildOptions)
751*6467f958SSadaf Ebrahimi {
752*6467f958SSadaf Ebrahimi return create_single_kernel_helper(context, outProgram, outKernel,
753*6467f958SSadaf Ebrahimi numKernelLines, kernelProgram,
754*6467f958SSadaf Ebrahimi kernelName, buildOptions);
755*6467f958SSadaf Ebrahimi }
756*6467f958SSadaf Ebrahimi
757*6467f958SSadaf Ebrahimi // Creates and builds OpenCL C/C++ program, and creates a kernel
create_single_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)758*6467f958SSadaf Ebrahimi int create_single_kernel_helper(cl_context context, cl_program *outProgram,
759*6467f958SSadaf Ebrahimi cl_kernel *outKernel,
760*6467f958SSadaf Ebrahimi unsigned int numKernelLines,
761*6467f958SSadaf Ebrahimi const char **kernelProgram,
762*6467f958SSadaf Ebrahimi const char *kernelName,
763*6467f958SSadaf Ebrahimi const char *buildOptions)
764*6467f958SSadaf Ebrahimi {
765*6467f958SSadaf Ebrahimi // For the logic that automatically adds -cl-std it is much cleaner if the
766*6467f958SSadaf Ebrahimi // build options have RAII. This buffer will store the potentially updated
767*6467f958SSadaf Ebrahimi // build options, in which case buildOptions will point at the string owned
768*6467f958SSadaf Ebrahimi // by this buffer.
769*6467f958SSadaf Ebrahimi std::string build_options_internal{ buildOptions ? buildOptions : "" };
770*6467f958SSadaf Ebrahimi
771*6467f958SSadaf Ebrahimi // Check the build options for the -cl-std option.
772*6467f958SSadaf Ebrahimi if (!buildOptions || !strstr(buildOptions, "-cl-std"))
773*6467f958SSadaf Ebrahimi {
774*6467f958SSadaf Ebrahimi // If the build option isn't present add it using the latest OpenCL-C
775*6467f958SSadaf Ebrahimi // version supported by the device. This allows calling code to force a
776*6467f958SSadaf Ebrahimi // particular CL C version if it is required, but also means that
777*6467f958SSadaf Ebrahimi // callers need not specify a version if they want to assume the most
778*6467f958SSadaf Ebrahimi // recent CL C.
779*6467f958SSadaf Ebrahimi
780*6467f958SSadaf Ebrahimi auto version = get_max_OpenCL_C_for_context(context);
781*6467f958SSadaf Ebrahimi
782*6467f958SSadaf Ebrahimi std::string cl_std{};
783*6467f958SSadaf Ebrahimi if (version >= Version(3, 0))
784*6467f958SSadaf Ebrahimi {
785*6467f958SSadaf Ebrahimi cl_std = "-cl-std=CL3.0";
786*6467f958SSadaf Ebrahimi }
787*6467f958SSadaf Ebrahimi else if (version >= Version(2, 0) && version < Version(3, 0))
788*6467f958SSadaf Ebrahimi {
789*6467f958SSadaf Ebrahimi cl_std = "-cl-std=CL2.0";
790*6467f958SSadaf Ebrahimi }
791*6467f958SSadaf Ebrahimi else
792*6467f958SSadaf Ebrahimi {
793*6467f958SSadaf Ebrahimi // If the -cl-std build option is not specified, the highest OpenCL
794*6467f958SSadaf Ebrahimi // C 1.x language version supported by each device is used when
795*6467f958SSadaf Ebrahimi // compiling the program for each device.
796*6467f958SSadaf Ebrahimi cl_std = "";
797*6467f958SSadaf Ebrahimi }
798*6467f958SSadaf Ebrahimi build_options_internal += ' ';
799*6467f958SSadaf Ebrahimi build_options_internal += cl_std;
800*6467f958SSadaf Ebrahimi buildOptions = build_options_internal.c_str();
801*6467f958SSadaf Ebrahimi }
802*6467f958SSadaf Ebrahimi int error = create_single_kernel_helper_create_program(
803*6467f958SSadaf Ebrahimi context, outProgram, numKernelLines, kernelProgram, buildOptions);
804*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS)
805*6467f958SSadaf Ebrahimi {
806*6467f958SSadaf Ebrahimi log_error("Create program failed: %d, line: %d\n", error, __LINE__);
807*6467f958SSadaf Ebrahimi return error;
808*6467f958SSadaf Ebrahimi }
809*6467f958SSadaf Ebrahimi
810*6467f958SSadaf Ebrahimi // Remove offline-compiler-only build options
811*6467f958SSadaf Ebrahimi std::string newBuildOptions;
812*6467f958SSadaf Ebrahimi if (buildOptions != NULL)
813*6467f958SSadaf Ebrahimi {
814*6467f958SSadaf Ebrahimi newBuildOptions = buildOptions;
815*6467f958SSadaf Ebrahimi std::string offlineCompierOptions[] = {
816*6467f958SSadaf Ebrahimi "-cl-fp16-enable", "-cl-fp64-enable", "-cl-zero-init-local-mem-vars"
817*6467f958SSadaf Ebrahimi };
818*6467f958SSadaf Ebrahimi for (auto &s : offlineCompierOptions)
819*6467f958SSadaf Ebrahimi {
820*6467f958SSadaf Ebrahimi std::string::size_type i = newBuildOptions.find(s);
821*6467f958SSadaf Ebrahimi if (i != std::string::npos) newBuildOptions.erase(i, s.length());
822*6467f958SSadaf Ebrahimi }
823*6467f958SSadaf Ebrahimi }
824*6467f958SSadaf Ebrahimi // Build program and create kernel
825*6467f958SSadaf Ebrahimi return build_program_create_kernel_helper(
826*6467f958SSadaf Ebrahimi context, outProgram, outKernel, numKernelLines, kernelProgram,
827*6467f958SSadaf Ebrahimi kernelName, newBuildOptions.c_str());
828*6467f958SSadaf Ebrahimi }
829*6467f958SSadaf Ebrahimi
830*6467f958SSadaf Ebrahimi // Builds OpenCL C/C++ program and creates
build_program_create_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)831*6467f958SSadaf Ebrahimi int build_program_create_kernel_helper(
832*6467f958SSadaf Ebrahimi cl_context context, cl_program *outProgram, cl_kernel *outKernel,
833*6467f958SSadaf Ebrahimi unsigned int numKernelLines, const char **kernelProgram,
834*6467f958SSadaf Ebrahimi const char *kernelName, const char *buildOptions)
835*6467f958SSadaf Ebrahimi {
836*6467f958SSadaf Ebrahimi int error;
837*6467f958SSadaf Ebrahimi /* Compile the program */
838*6467f958SSadaf Ebrahimi int buildProgramFailed = 0;
839*6467f958SSadaf Ebrahimi int printedSource = 0;
840*6467f958SSadaf Ebrahimi error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL);
841*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS)
842*6467f958SSadaf Ebrahimi {
843*6467f958SSadaf Ebrahimi unsigned int i;
844*6467f958SSadaf Ebrahimi print_error(error, "clBuildProgram failed");
845*6467f958SSadaf Ebrahimi buildProgramFailed = 1;
846*6467f958SSadaf Ebrahimi printedSource = 1;
847*6467f958SSadaf Ebrahimi log_error("Build options: %s\n", buildOptions);
848*6467f958SSadaf Ebrahimi log_error("Original source is: ------------\n");
849*6467f958SSadaf Ebrahimi for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]);
850*6467f958SSadaf Ebrahimi }
851*6467f958SSadaf Ebrahimi
852*6467f958SSadaf Ebrahimi // Verify the build status on all devices
853*6467f958SSadaf Ebrahimi cl_uint deviceCount = 0;
854*6467f958SSadaf Ebrahimi error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES,
855*6467f958SSadaf Ebrahimi sizeof(deviceCount), &deviceCount, NULL);
856*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS)
857*6467f958SSadaf Ebrahimi {
858*6467f958SSadaf Ebrahimi print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
859*6467f958SSadaf Ebrahimi return error;
860*6467f958SSadaf Ebrahimi }
861*6467f958SSadaf Ebrahimi
862*6467f958SSadaf Ebrahimi if (deviceCount == 0)
863*6467f958SSadaf Ebrahimi {
864*6467f958SSadaf Ebrahimi log_error("No devices found for program.\n");
865*6467f958SSadaf Ebrahimi return -1;
866*6467f958SSadaf Ebrahimi }
867*6467f958SSadaf Ebrahimi
868*6467f958SSadaf Ebrahimi cl_device_id *devices =
869*6467f958SSadaf Ebrahimi (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
870*6467f958SSadaf Ebrahimi if (NULL == devices) return -1;
871*6467f958SSadaf Ebrahimi BufferOwningPtr<cl_device_id> devicesBuf(devices);
872*6467f958SSadaf Ebrahimi
873*6467f958SSadaf Ebrahimi memset(devices, 0, deviceCount * sizeof(cl_device_id));
874*6467f958SSadaf Ebrahimi error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES,
875*6467f958SSadaf Ebrahimi sizeof(cl_device_id) * deviceCount, devices, NULL);
876*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS)
877*6467f958SSadaf Ebrahimi {
878*6467f958SSadaf Ebrahimi print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
879*6467f958SSadaf Ebrahimi return error;
880*6467f958SSadaf Ebrahimi }
881*6467f958SSadaf Ebrahimi
882*6467f958SSadaf Ebrahimi cl_uint z;
883*6467f958SSadaf Ebrahimi bool buildFailed = false;
884*6467f958SSadaf Ebrahimi for (z = 0; z < deviceCount; z++)
885*6467f958SSadaf Ebrahimi {
886*6467f958SSadaf Ebrahimi char deviceName[4096] = "";
887*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName),
888*6467f958SSadaf Ebrahimi deviceName, NULL);
889*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS || deviceName[0] == '\0')
890*6467f958SSadaf Ebrahimi {
891*6467f958SSadaf Ebrahimi log_error("Device \"%d\" failed to return a name\n", z);
892*6467f958SSadaf Ebrahimi print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
893*6467f958SSadaf Ebrahimi }
894*6467f958SSadaf Ebrahimi
895*6467f958SSadaf Ebrahimi cl_build_status buildStatus;
896*6467f958SSadaf Ebrahimi error = clGetProgramBuildInfo(*outProgram, devices[z],
897*6467f958SSadaf Ebrahimi CL_PROGRAM_BUILD_STATUS,
898*6467f958SSadaf Ebrahimi sizeof(buildStatus), &buildStatus, NULL);
899*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS)
900*6467f958SSadaf Ebrahimi {
901*6467f958SSadaf Ebrahimi print_error(error,
902*6467f958SSadaf Ebrahimi "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
903*6467f958SSadaf Ebrahimi return error;
904*6467f958SSadaf Ebrahimi }
905*6467f958SSadaf Ebrahimi
906*6467f958SSadaf Ebrahimi if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed
907*6467f958SSadaf Ebrahimi && deviceCount == 1)
908*6467f958SSadaf Ebrahimi {
909*6467f958SSadaf Ebrahimi buildFailed = true;
910*6467f958SSadaf Ebrahimi log_error("clBuildProgram returned an error, but buildStatus is "
911*6467f958SSadaf Ebrahimi "marked as CL_BUILD_SUCCESS.\n");
912*6467f958SSadaf Ebrahimi }
913*6467f958SSadaf Ebrahimi
914*6467f958SSadaf Ebrahimi if (buildStatus != CL_BUILD_SUCCESS)
915*6467f958SSadaf Ebrahimi {
916*6467f958SSadaf Ebrahimi
917*6467f958SSadaf Ebrahimi char statusString[64] = "";
918*6467f958SSadaf Ebrahimi if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
919*6467f958SSadaf Ebrahimi sprintf(statusString, "CL_BUILD_SUCCESS");
920*6467f958SSadaf Ebrahimi else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
921*6467f958SSadaf Ebrahimi sprintf(statusString, "CL_BUILD_NONE");
922*6467f958SSadaf Ebrahimi else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
923*6467f958SSadaf Ebrahimi sprintf(statusString, "CL_BUILD_ERROR");
924*6467f958SSadaf Ebrahimi else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
925*6467f958SSadaf Ebrahimi sprintf(statusString, "CL_BUILD_IN_PROGRESS");
926*6467f958SSadaf Ebrahimi else
927*6467f958SSadaf Ebrahimi sprintf(statusString, "UNKNOWN (%d)", buildStatus);
928*6467f958SSadaf Ebrahimi
929*6467f958SSadaf Ebrahimi if (buildStatus != CL_BUILD_SUCCESS)
930*6467f958SSadaf Ebrahimi log_error(
931*6467f958SSadaf Ebrahimi "Build not successful for device \"%s\", status: %s\n",
932*6467f958SSadaf Ebrahimi deviceName, statusString);
933*6467f958SSadaf Ebrahimi size_t paramSize = 0;
934*6467f958SSadaf Ebrahimi error = clGetProgramBuildInfo(*outProgram, devices[z],
935*6467f958SSadaf Ebrahimi CL_PROGRAM_BUILD_LOG, 0, NULL,
936*6467f958SSadaf Ebrahimi ¶mSize);
937*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS)
938*6467f958SSadaf Ebrahimi {
939*6467f958SSadaf Ebrahimi
940*6467f958SSadaf Ebrahimi print_error(
941*6467f958SSadaf Ebrahimi error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
942*6467f958SSadaf Ebrahimi return error;
943*6467f958SSadaf Ebrahimi }
944*6467f958SSadaf Ebrahimi
945*6467f958SSadaf Ebrahimi std::string log;
946*6467f958SSadaf Ebrahimi log.resize(paramSize / sizeof(char));
947*6467f958SSadaf Ebrahimi error = clGetProgramBuildInfo(*outProgram, devices[z],
948*6467f958SSadaf Ebrahimi CL_PROGRAM_BUILD_LOG, paramSize,
949*6467f958SSadaf Ebrahimi &log[0], NULL);
950*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS || log[0] == '\0')
951*6467f958SSadaf Ebrahimi {
952*6467f958SSadaf Ebrahimi log_error("Device %d (%s) failed to return a build log\n", z,
953*6467f958SSadaf Ebrahimi deviceName);
954*6467f958SSadaf Ebrahimi if (error)
955*6467f958SSadaf Ebrahimi {
956*6467f958SSadaf Ebrahimi print_error(
957*6467f958SSadaf Ebrahimi error,
958*6467f958SSadaf Ebrahimi "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
959*6467f958SSadaf Ebrahimi return error;
960*6467f958SSadaf Ebrahimi }
961*6467f958SSadaf Ebrahimi else
962*6467f958SSadaf Ebrahimi {
963*6467f958SSadaf Ebrahimi log_error("clGetProgramBuildInfo returned an empty log.\n");
964*6467f958SSadaf Ebrahimi return -1;
965*6467f958SSadaf Ebrahimi }
966*6467f958SSadaf Ebrahimi }
967*6467f958SSadaf Ebrahimi // In this case we've already printed out the code above.
968*6467f958SSadaf Ebrahimi if (!printedSource)
969*6467f958SSadaf Ebrahimi {
970*6467f958SSadaf Ebrahimi unsigned int i;
971*6467f958SSadaf Ebrahimi log_error("Original source is: ------------\n");
972*6467f958SSadaf Ebrahimi for (i = 0; i < numKernelLines; i++)
973*6467f958SSadaf Ebrahimi log_error("%s", kernelProgram[i]);
974*6467f958SSadaf Ebrahimi printedSource = 1;
975*6467f958SSadaf Ebrahimi }
976*6467f958SSadaf Ebrahimi log_error("Build log for device \"%s\" is: ------------\n",
977*6467f958SSadaf Ebrahimi deviceName);
978*6467f958SSadaf Ebrahimi log_error("%s\n", log.c_str());
979*6467f958SSadaf Ebrahimi log_error("\n----------\n");
980*6467f958SSadaf Ebrahimi return -1;
981*6467f958SSadaf Ebrahimi }
982*6467f958SSadaf Ebrahimi }
983*6467f958SSadaf Ebrahimi
984*6467f958SSadaf Ebrahimi if (buildFailed)
985*6467f958SSadaf Ebrahimi {
986*6467f958SSadaf Ebrahimi return -1;
987*6467f958SSadaf Ebrahimi }
988*6467f958SSadaf Ebrahimi
989*6467f958SSadaf Ebrahimi /* And create a kernel from it */
990*6467f958SSadaf Ebrahimi if (kernelName != NULL)
991*6467f958SSadaf Ebrahimi {
992*6467f958SSadaf Ebrahimi *outKernel = clCreateKernel(*outProgram, kernelName, &error);
993*6467f958SSadaf Ebrahimi if (*outKernel == NULL || error != CL_SUCCESS)
994*6467f958SSadaf Ebrahimi {
995*6467f958SSadaf Ebrahimi print_error(error, "Unable to create kernel");
996*6467f958SSadaf Ebrahimi return error;
997*6467f958SSadaf Ebrahimi }
998*6467f958SSadaf Ebrahimi }
999*6467f958SSadaf Ebrahimi
1000*6467f958SSadaf Ebrahimi return 0;
1001*6467f958SSadaf Ebrahimi }
1002*6467f958SSadaf Ebrahimi
get_max_allowed_work_group_size(cl_context context,cl_kernel kernel,size_t * outMaxSize,size_t * outLimits)1003*6467f958SSadaf Ebrahimi int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
1004*6467f958SSadaf Ebrahimi size_t *outMaxSize, size_t *outLimits)
1005*6467f958SSadaf Ebrahimi {
1006*6467f958SSadaf Ebrahimi cl_device_id *devices;
1007*6467f958SSadaf Ebrahimi size_t size, maxCommonSize = 0;
1008*6467f958SSadaf Ebrahimi int numDevices, i, j, error;
1009*6467f958SSadaf Ebrahimi cl_uint numDims;
1010*6467f958SSadaf Ebrahimi size_t outSize;
1011*6467f958SSadaf Ebrahimi size_t sizeLimit[] = { 1, 1, 1 };
1012*6467f958SSadaf Ebrahimi
1013*6467f958SSadaf Ebrahimi
1014*6467f958SSadaf Ebrahimi /* Assume fewer than 16 devices will be returned */
1015*6467f958SSadaf Ebrahimi error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &outSize);
1016*6467f958SSadaf Ebrahimi test_error(error, "Unable to obtain list of devices size for context");
1017*6467f958SSadaf Ebrahimi devices = (cl_device_id *)malloc(outSize);
1018*6467f958SSadaf Ebrahimi BufferOwningPtr<cl_device_id> devicesBuf(devices);
1019*6467f958SSadaf Ebrahimi
1020*6467f958SSadaf Ebrahimi error =
1021*6467f958SSadaf Ebrahimi clGetContextInfo(context, CL_CONTEXT_DEVICES, outSize, devices, NULL);
1022*6467f958SSadaf Ebrahimi test_error(error, "Unable to obtain list of devices for context");
1023*6467f958SSadaf Ebrahimi
1024*6467f958SSadaf Ebrahimi numDevices = (int)(outSize / sizeof(cl_device_id));
1025*6467f958SSadaf Ebrahimi
1026*6467f958SSadaf Ebrahimi for (i = 0; i < numDevices; i++)
1027*6467f958SSadaf Ebrahimi {
1028*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE,
1029*6467f958SSadaf Ebrahimi sizeof(size), &size, NULL);
1030*6467f958SSadaf Ebrahimi test_error(error, "Unable to obtain max work group size for device");
1031*6467f958SSadaf Ebrahimi if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
1032*6467f958SSadaf Ebrahimi
1033*6467f958SSadaf Ebrahimi error = clGetKernelWorkGroupInfo(kernel, devices[i],
1034*6467f958SSadaf Ebrahimi CL_KERNEL_WORK_GROUP_SIZE,
1035*6467f958SSadaf Ebrahimi sizeof(size), &size, NULL);
1036*6467f958SSadaf Ebrahimi test_error(
1037*6467f958SSadaf Ebrahimi error,
1038*6467f958SSadaf Ebrahimi "Unable to obtain max work group size for device and kernel combo");
1039*6467f958SSadaf Ebrahimi if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
1040*6467f958SSadaf Ebrahimi
1041*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
1042*6467f958SSadaf Ebrahimi sizeof(numDims), &numDims, NULL);
1043*6467f958SSadaf Ebrahimi test_error(
1044*6467f958SSadaf Ebrahimi error,
1045*6467f958SSadaf Ebrahimi "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
1046*6467f958SSadaf Ebrahimi sizeLimit[0] = 1;
1047*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES,
1048*6467f958SSadaf Ebrahimi numDims * sizeof(size_t), sizeLimit, NULL);
1049*6467f958SSadaf Ebrahimi test_error(error,
1050*6467f958SSadaf Ebrahimi "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
1051*6467f958SSadaf Ebrahimi
1052*6467f958SSadaf Ebrahimi if (outLimits != NULL)
1053*6467f958SSadaf Ebrahimi {
1054*6467f958SSadaf Ebrahimi if (i == 0)
1055*6467f958SSadaf Ebrahimi {
1056*6467f958SSadaf Ebrahimi for (j = 0; j < 3; j++) outLimits[j] = sizeLimit[j];
1057*6467f958SSadaf Ebrahimi }
1058*6467f958SSadaf Ebrahimi else
1059*6467f958SSadaf Ebrahimi {
1060*6467f958SSadaf Ebrahimi for (j = 0; j < (int)numDims; j++)
1061*6467f958SSadaf Ebrahimi {
1062*6467f958SSadaf Ebrahimi if (sizeLimit[j] < outLimits[j])
1063*6467f958SSadaf Ebrahimi outLimits[j] = sizeLimit[j];
1064*6467f958SSadaf Ebrahimi }
1065*6467f958SSadaf Ebrahimi }
1066*6467f958SSadaf Ebrahimi }
1067*6467f958SSadaf Ebrahimi }
1068*6467f958SSadaf Ebrahimi
1069*6467f958SSadaf Ebrahimi *outMaxSize = (unsigned int)maxCommonSize;
1070*6467f958SSadaf Ebrahimi return 0;
1071*6467f958SSadaf Ebrahimi }
1072*6467f958SSadaf Ebrahimi
1073*6467f958SSadaf Ebrahimi
get_max_allowed_1d_work_group_size_on_device(cl_device_id device,cl_kernel kernel,size_t * outSize)1074*6467f958SSadaf Ebrahimi extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
1075*6467f958SSadaf Ebrahimi cl_kernel kernel,
1076*6467f958SSadaf Ebrahimi size_t *outSize)
1077*6467f958SSadaf Ebrahimi {
1078*6467f958SSadaf Ebrahimi cl_uint maxDim;
1079*6467f958SSadaf Ebrahimi size_t maxWgSize;
1080*6467f958SSadaf Ebrahimi size_t *maxWgSizePerDim;
1081*6467f958SSadaf Ebrahimi int error;
1082*6467f958SSadaf Ebrahimi
1083*6467f958SSadaf Ebrahimi error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
1084*6467f958SSadaf Ebrahimi sizeof(size_t), &maxWgSize, NULL);
1085*6467f958SSadaf Ebrahimi test_error(error,
1086*6467f958SSadaf Ebrahimi "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed");
1087*6467f958SSadaf Ebrahimi
1088*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
1089*6467f958SSadaf Ebrahimi sizeof(cl_uint), &maxDim, NULL);
1090*6467f958SSadaf Ebrahimi test_error(error,
1091*6467f958SSadaf Ebrahimi "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
1092*6467f958SSadaf Ebrahimi maxWgSizePerDim = (size_t *)malloc(maxDim * sizeof(size_t));
1093*6467f958SSadaf Ebrahimi if (!maxWgSizePerDim)
1094*6467f958SSadaf Ebrahimi {
1095*6467f958SSadaf Ebrahimi log_error("Unable to allocate maxWgSizePerDim\n");
1096*6467f958SSadaf Ebrahimi return -1;
1097*6467f958SSadaf Ebrahimi }
1098*6467f958SSadaf Ebrahimi
1099*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1100*6467f958SSadaf Ebrahimi maxDim * sizeof(size_t), maxWgSizePerDim, NULL);
1101*6467f958SSadaf Ebrahimi if (error != CL_SUCCESS)
1102*6467f958SSadaf Ebrahimi {
1103*6467f958SSadaf Ebrahimi log_error("clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n");
1104*6467f958SSadaf Ebrahimi free(maxWgSizePerDim);
1105*6467f958SSadaf Ebrahimi return error;
1106*6467f958SSadaf Ebrahimi }
1107*6467f958SSadaf Ebrahimi
1108*6467f958SSadaf Ebrahimi // "maxWgSize" is limited to that of the first dimension.
1109*6467f958SSadaf Ebrahimi if (maxWgSize > maxWgSizePerDim[0])
1110*6467f958SSadaf Ebrahimi {
1111*6467f958SSadaf Ebrahimi maxWgSize = maxWgSizePerDim[0];
1112*6467f958SSadaf Ebrahimi }
1113*6467f958SSadaf Ebrahimi
1114*6467f958SSadaf Ebrahimi free(maxWgSizePerDim);
1115*6467f958SSadaf Ebrahimi
1116*6467f958SSadaf Ebrahimi *outSize = maxWgSize;
1117*6467f958SSadaf Ebrahimi return 0;
1118*6467f958SSadaf Ebrahimi }
1119*6467f958SSadaf Ebrahimi
1120*6467f958SSadaf Ebrahimi
get_max_common_work_group_size(cl_context context,cl_kernel kernel,size_t globalThreadSize,size_t * outMaxSize)1121*6467f958SSadaf Ebrahimi int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
1122*6467f958SSadaf Ebrahimi size_t globalThreadSize, size_t *outMaxSize)
1123*6467f958SSadaf Ebrahimi {
1124*6467f958SSadaf Ebrahimi size_t sizeLimit[3];
1125*6467f958SSadaf Ebrahimi int error =
1126*6467f958SSadaf Ebrahimi get_max_allowed_work_group_size(context, kernel, outMaxSize, sizeLimit);
1127*6467f958SSadaf Ebrahimi if (error != 0) return error;
1128*6467f958SSadaf Ebrahimi
1129*6467f958SSadaf Ebrahimi /* Now find the largest factor of globalThreadSize that is <= maxCommonSize
1130*6467f958SSadaf Ebrahimi */
1131*6467f958SSadaf Ebrahimi /* Note for speed, we don't need to check the range of maxCommonSize, b/c
1132*6467f958SSadaf Ebrahimi once it gets to 1, the modulo test will succeed and break the loop anyway
1133*6467f958SSadaf Ebrahimi */
1134*6467f958SSadaf Ebrahimi for (;
1135*6467f958SSadaf Ebrahimi (globalThreadSize % *outMaxSize) != 0 || (*outMaxSize > sizeLimit[0]);
1136*6467f958SSadaf Ebrahimi (*outMaxSize)--)
1137*6467f958SSadaf Ebrahimi ;
1138*6467f958SSadaf Ebrahimi return 0;
1139*6467f958SSadaf Ebrahimi }
1140*6467f958SSadaf Ebrahimi
get_max_common_2D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1141*6467f958SSadaf Ebrahimi int get_max_common_2D_work_group_size(cl_context context, cl_kernel kernel,
1142*6467f958SSadaf Ebrahimi size_t *globalThreadSizes,
1143*6467f958SSadaf Ebrahimi size_t *outMaxSizes)
1144*6467f958SSadaf Ebrahimi {
1145*6467f958SSadaf Ebrahimi size_t sizeLimit[3];
1146*6467f958SSadaf Ebrahimi size_t maxSize;
1147*6467f958SSadaf Ebrahimi int error =
1148*6467f958SSadaf Ebrahimi get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
1149*6467f958SSadaf Ebrahimi if (error != 0) return error;
1150*6467f958SSadaf Ebrahimi
1151*6467f958SSadaf Ebrahimi /* Now find a set of factors, multiplied together less than maxSize, but
1152*6467f958SSadaf Ebrahimi each a factor of the global sizes */
1153*6467f958SSadaf Ebrahimi
1154*6467f958SSadaf Ebrahimi /* Simple case */
1155*6467f958SSadaf Ebrahimi if (globalThreadSizes[0] * globalThreadSizes[1] <= maxSize)
1156*6467f958SSadaf Ebrahimi {
1157*6467f958SSadaf Ebrahimi if (globalThreadSizes[0] <= sizeLimit[0]
1158*6467f958SSadaf Ebrahimi && globalThreadSizes[1] <= sizeLimit[1])
1159*6467f958SSadaf Ebrahimi {
1160*6467f958SSadaf Ebrahimi outMaxSizes[0] = globalThreadSizes[0];
1161*6467f958SSadaf Ebrahimi outMaxSizes[1] = globalThreadSizes[1];
1162*6467f958SSadaf Ebrahimi return 0;
1163*6467f958SSadaf Ebrahimi }
1164*6467f958SSadaf Ebrahimi }
1165*6467f958SSadaf Ebrahimi
1166*6467f958SSadaf Ebrahimi size_t remainingSize, sizeForThisOne;
1167*6467f958SSadaf Ebrahimi remainingSize = maxSize;
1168*6467f958SSadaf Ebrahimi int i, j;
1169*6467f958SSadaf Ebrahimi for (i = 0; i < 2; i++)
1170*6467f958SSadaf Ebrahimi {
1171*6467f958SSadaf Ebrahimi if (globalThreadSizes[i] > remainingSize)
1172*6467f958SSadaf Ebrahimi sizeForThisOne = remainingSize;
1173*6467f958SSadaf Ebrahimi else
1174*6467f958SSadaf Ebrahimi sizeForThisOne = globalThreadSizes[i];
1175*6467f958SSadaf Ebrahimi for (; (globalThreadSizes[i] % sizeForThisOne) != 0
1176*6467f958SSadaf Ebrahimi || (sizeForThisOne > sizeLimit[i]);
1177*6467f958SSadaf Ebrahimi sizeForThisOne--)
1178*6467f958SSadaf Ebrahimi ;
1179*6467f958SSadaf Ebrahimi outMaxSizes[i] = sizeForThisOne;
1180*6467f958SSadaf Ebrahimi remainingSize = maxSize;
1181*6467f958SSadaf Ebrahimi for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
1182*6467f958SSadaf Ebrahimi }
1183*6467f958SSadaf Ebrahimi
1184*6467f958SSadaf Ebrahimi return 0;
1185*6467f958SSadaf Ebrahimi }
1186*6467f958SSadaf Ebrahimi
get_max_common_3D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1187*6467f958SSadaf Ebrahimi int get_max_common_3D_work_group_size(cl_context context, cl_kernel kernel,
1188*6467f958SSadaf Ebrahimi size_t *globalThreadSizes,
1189*6467f958SSadaf Ebrahimi size_t *outMaxSizes)
1190*6467f958SSadaf Ebrahimi {
1191*6467f958SSadaf Ebrahimi size_t sizeLimit[3];
1192*6467f958SSadaf Ebrahimi size_t maxSize;
1193*6467f958SSadaf Ebrahimi int error =
1194*6467f958SSadaf Ebrahimi get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
1195*6467f958SSadaf Ebrahimi if (error != 0) return error;
1196*6467f958SSadaf Ebrahimi /* Now find a set of factors, multiplied together less than maxSize, but
1197*6467f958SSadaf Ebrahimi each a factor of the global sizes */
1198*6467f958SSadaf Ebrahimi
1199*6467f958SSadaf Ebrahimi /* Simple case */
1200*6467f958SSadaf Ebrahimi if (globalThreadSizes[0] * globalThreadSizes[1] * globalThreadSizes[2]
1201*6467f958SSadaf Ebrahimi <= maxSize)
1202*6467f958SSadaf Ebrahimi {
1203*6467f958SSadaf Ebrahimi if (globalThreadSizes[0] <= sizeLimit[0]
1204*6467f958SSadaf Ebrahimi && globalThreadSizes[1] <= sizeLimit[1]
1205*6467f958SSadaf Ebrahimi && globalThreadSizes[2] <= sizeLimit[2])
1206*6467f958SSadaf Ebrahimi {
1207*6467f958SSadaf Ebrahimi outMaxSizes[0] = globalThreadSizes[0];
1208*6467f958SSadaf Ebrahimi outMaxSizes[1] = globalThreadSizes[1];
1209*6467f958SSadaf Ebrahimi outMaxSizes[2] = globalThreadSizes[2];
1210*6467f958SSadaf Ebrahimi return 0;
1211*6467f958SSadaf Ebrahimi }
1212*6467f958SSadaf Ebrahimi }
1213*6467f958SSadaf Ebrahimi
1214*6467f958SSadaf Ebrahimi size_t remainingSize, sizeForThisOne;
1215*6467f958SSadaf Ebrahimi remainingSize = maxSize;
1216*6467f958SSadaf Ebrahimi int i, j;
1217*6467f958SSadaf Ebrahimi for (i = 0; i < 3; i++)
1218*6467f958SSadaf Ebrahimi {
1219*6467f958SSadaf Ebrahimi if (globalThreadSizes[i] > remainingSize)
1220*6467f958SSadaf Ebrahimi sizeForThisOne = remainingSize;
1221*6467f958SSadaf Ebrahimi else
1222*6467f958SSadaf Ebrahimi sizeForThisOne = globalThreadSizes[i];
1223*6467f958SSadaf Ebrahimi for (; (globalThreadSizes[i] % sizeForThisOne) != 0
1224*6467f958SSadaf Ebrahimi || (sizeForThisOne > sizeLimit[i]);
1225*6467f958SSadaf Ebrahimi sizeForThisOne--)
1226*6467f958SSadaf Ebrahimi ;
1227*6467f958SSadaf Ebrahimi outMaxSizes[i] = sizeForThisOne;
1228*6467f958SSadaf Ebrahimi remainingSize = maxSize;
1229*6467f958SSadaf Ebrahimi for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
1230*6467f958SSadaf Ebrahimi }
1231*6467f958SSadaf Ebrahimi
1232*6467f958SSadaf Ebrahimi return 0;
1233*6467f958SSadaf Ebrahimi }
1234*6467f958SSadaf Ebrahimi
1235*6467f958SSadaf Ebrahimi /* Helper to determine if a device supports an image format */
is_image_format_supported(cl_context context,cl_mem_flags flags,cl_mem_object_type image_type,const cl_image_format * fmt)1236*6467f958SSadaf Ebrahimi int is_image_format_supported(cl_context context, cl_mem_flags flags,
1237*6467f958SSadaf Ebrahimi cl_mem_object_type image_type,
1238*6467f958SSadaf Ebrahimi const cl_image_format *fmt)
1239*6467f958SSadaf Ebrahimi {
1240*6467f958SSadaf Ebrahimi cl_image_format *list;
1241*6467f958SSadaf Ebrahimi cl_uint count = 0;
1242*6467f958SSadaf Ebrahimi cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
1243*6467f958SSadaf Ebrahimi NULL, &count);
1244*6467f958SSadaf Ebrahimi if (count == 0) return 0;
1245*6467f958SSadaf Ebrahimi
1246*6467f958SSadaf Ebrahimi list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
1247*6467f958SSadaf Ebrahimi if (NULL == list)
1248*6467f958SSadaf Ebrahimi {
1249*6467f958SSadaf Ebrahimi log_error("Error: unable to allocate %zu byte buffer for image format "
1250*6467f958SSadaf Ebrahimi "list at %s:%d (err = %d)\n",
1251*6467f958SSadaf Ebrahimi count * sizeof(cl_image_format), __FILE__, __LINE__, err);
1252*6467f958SSadaf Ebrahimi return 0;
1253*6467f958SSadaf Ebrahimi }
1254*6467f958SSadaf Ebrahimi BufferOwningPtr<cl_image_format> listBuf(list);
1255*6467f958SSadaf Ebrahimi
1256*6467f958SSadaf Ebrahimi
1257*6467f958SSadaf Ebrahimi cl_int error = clGetSupportedImageFormats(context, flags, image_type, count,
1258*6467f958SSadaf Ebrahimi list, NULL);
1259*6467f958SSadaf Ebrahimi if (error)
1260*6467f958SSadaf Ebrahimi {
1261*6467f958SSadaf Ebrahimi log_error("Error: failed to obtain supported image type list at %s:%d "
1262*6467f958SSadaf Ebrahimi "(err = %d)\n",
1263*6467f958SSadaf Ebrahimi __FILE__, __LINE__, err);
1264*6467f958SSadaf Ebrahimi return 0;
1265*6467f958SSadaf Ebrahimi }
1266*6467f958SSadaf Ebrahimi
1267*6467f958SSadaf Ebrahimi // iterate looking for a match.
1268*6467f958SSadaf Ebrahimi cl_uint i;
1269*6467f958SSadaf Ebrahimi for (i = 0; i < count; i++)
1270*6467f958SSadaf Ebrahimi {
1271*6467f958SSadaf Ebrahimi if (fmt->image_channel_data_type == list[i].image_channel_data_type
1272*6467f958SSadaf Ebrahimi && fmt->image_channel_order == list[i].image_channel_order)
1273*6467f958SSadaf Ebrahimi break;
1274*6467f958SSadaf Ebrahimi }
1275*6467f958SSadaf Ebrahimi
1276*6467f958SSadaf Ebrahimi return (i < count) ? 1 : 0;
1277*6467f958SSadaf Ebrahimi }
1278*6467f958SSadaf Ebrahimi
1279*6467f958SSadaf Ebrahimi size_t get_pixel_bytes(const cl_image_format *fmt);
get_pixel_bytes(const cl_image_format * fmt)1280*6467f958SSadaf Ebrahimi size_t get_pixel_bytes(const cl_image_format *fmt)
1281*6467f958SSadaf Ebrahimi {
1282*6467f958SSadaf Ebrahimi size_t chanCount;
1283*6467f958SSadaf Ebrahimi switch (fmt->image_channel_order)
1284*6467f958SSadaf Ebrahimi {
1285*6467f958SSadaf Ebrahimi case CL_R:
1286*6467f958SSadaf Ebrahimi case CL_A:
1287*6467f958SSadaf Ebrahimi case CL_Rx:
1288*6467f958SSadaf Ebrahimi case CL_INTENSITY:
1289*6467f958SSadaf Ebrahimi case CL_LUMINANCE:
1290*6467f958SSadaf Ebrahimi case CL_DEPTH: chanCount = 1; break;
1291*6467f958SSadaf Ebrahimi case CL_RG:
1292*6467f958SSadaf Ebrahimi case CL_RA:
1293*6467f958SSadaf Ebrahimi case CL_RGx: chanCount = 2; break;
1294*6467f958SSadaf Ebrahimi case CL_RGB:
1295*6467f958SSadaf Ebrahimi case CL_RGBx:
1296*6467f958SSadaf Ebrahimi case CL_sRGB:
1297*6467f958SSadaf Ebrahimi case CL_sRGBx: chanCount = 3; break;
1298*6467f958SSadaf Ebrahimi case CL_RGBA:
1299*6467f958SSadaf Ebrahimi case CL_ARGB:
1300*6467f958SSadaf Ebrahimi case CL_BGRA:
1301*6467f958SSadaf Ebrahimi case CL_sBGRA:
1302*6467f958SSadaf Ebrahimi case CL_sRGBA:
1303*6467f958SSadaf Ebrahimi #ifdef CL_1RGB_APPLE
1304*6467f958SSadaf Ebrahimi case CL_1RGB_APPLE:
1305*6467f958SSadaf Ebrahimi #endif
1306*6467f958SSadaf Ebrahimi #ifdef CL_BGR1_APPLE
1307*6467f958SSadaf Ebrahimi case CL_BGR1_APPLE:
1308*6467f958SSadaf Ebrahimi #endif
1309*6467f958SSadaf Ebrahimi chanCount = 4;
1310*6467f958SSadaf Ebrahimi break;
1311*6467f958SSadaf Ebrahimi default:
1312*6467f958SSadaf Ebrahimi log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__);
1313*6467f958SSadaf Ebrahimi abort();
1314*6467f958SSadaf Ebrahimi break;
1315*6467f958SSadaf Ebrahimi }
1316*6467f958SSadaf Ebrahimi
1317*6467f958SSadaf Ebrahimi switch (fmt->image_channel_data_type)
1318*6467f958SSadaf Ebrahimi {
1319*6467f958SSadaf Ebrahimi case CL_UNORM_SHORT_565:
1320*6467f958SSadaf Ebrahimi case CL_UNORM_SHORT_555: return 2;
1321*6467f958SSadaf Ebrahimi
1322*6467f958SSadaf Ebrahimi case CL_UNORM_INT_101010: return 4;
1323*6467f958SSadaf Ebrahimi
1324*6467f958SSadaf Ebrahimi case CL_SNORM_INT8:
1325*6467f958SSadaf Ebrahimi case CL_UNORM_INT8:
1326*6467f958SSadaf Ebrahimi case CL_SIGNED_INT8:
1327*6467f958SSadaf Ebrahimi case CL_UNSIGNED_INT8: return chanCount;
1328*6467f958SSadaf Ebrahimi
1329*6467f958SSadaf Ebrahimi case CL_SNORM_INT16:
1330*6467f958SSadaf Ebrahimi case CL_UNORM_INT16:
1331*6467f958SSadaf Ebrahimi case CL_HALF_FLOAT:
1332*6467f958SSadaf Ebrahimi case CL_SIGNED_INT16:
1333*6467f958SSadaf Ebrahimi case CL_UNSIGNED_INT16:
1334*6467f958SSadaf Ebrahimi #ifdef CL_SFIXED14_APPLE
1335*6467f958SSadaf Ebrahimi case CL_SFIXED14_APPLE:
1336*6467f958SSadaf Ebrahimi #endif
1337*6467f958SSadaf Ebrahimi return chanCount * 2;
1338*6467f958SSadaf Ebrahimi
1339*6467f958SSadaf Ebrahimi case CL_SIGNED_INT32:
1340*6467f958SSadaf Ebrahimi case CL_UNSIGNED_INT32:
1341*6467f958SSadaf Ebrahimi case CL_FLOAT: return chanCount * 4;
1342*6467f958SSadaf Ebrahimi
1343*6467f958SSadaf Ebrahimi default:
1344*6467f958SSadaf Ebrahimi log_error("Unknown channel data type at %s:%d!\n", __FILE__,
1345*6467f958SSadaf Ebrahimi __LINE__);
1346*6467f958SSadaf Ebrahimi abort();
1347*6467f958SSadaf Ebrahimi }
1348*6467f958SSadaf Ebrahimi
1349*6467f958SSadaf Ebrahimi return 0;
1350*6467f958SSadaf Ebrahimi }
1351*6467f958SSadaf Ebrahimi
verifyImageSupport(cl_device_id device)1352*6467f958SSadaf Ebrahimi test_status verifyImageSupport(cl_device_id device)
1353*6467f958SSadaf Ebrahimi {
1354*6467f958SSadaf Ebrahimi int result = checkForImageSupport(device);
1355*6467f958SSadaf Ebrahimi if (result == 0)
1356*6467f958SSadaf Ebrahimi {
1357*6467f958SSadaf Ebrahimi return TEST_PASS;
1358*6467f958SSadaf Ebrahimi }
1359*6467f958SSadaf Ebrahimi if (result == CL_IMAGE_FORMAT_NOT_SUPPORTED)
1360*6467f958SSadaf Ebrahimi {
1361*6467f958SSadaf Ebrahimi log_error("SKIPPED: Device does not supported images as required by "
1362*6467f958SSadaf Ebrahimi "this test!\n");
1363*6467f958SSadaf Ebrahimi return TEST_SKIP;
1364*6467f958SSadaf Ebrahimi }
1365*6467f958SSadaf Ebrahimi return TEST_FAIL;
1366*6467f958SSadaf Ebrahimi }
1367*6467f958SSadaf Ebrahimi
checkForImageSupport(cl_device_id device)1368*6467f958SSadaf Ebrahimi int checkForImageSupport(cl_device_id device)
1369*6467f958SSadaf Ebrahimi {
1370*6467f958SSadaf Ebrahimi cl_uint i;
1371*6467f958SSadaf Ebrahimi int error;
1372*6467f958SSadaf Ebrahimi
1373*6467f958SSadaf Ebrahimi
1374*6467f958SSadaf Ebrahimi /* Check the device props to see if images are supported at all first */
1375*6467f958SSadaf Ebrahimi error =
1376*6467f958SSadaf Ebrahimi clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
1377*6467f958SSadaf Ebrahimi test_error(error, "Unable to query device for image support");
1378*6467f958SSadaf Ebrahimi if (i == 0)
1379*6467f958SSadaf Ebrahimi {
1380*6467f958SSadaf Ebrahimi return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1381*6467f958SSadaf Ebrahimi }
1382*6467f958SSadaf Ebrahimi
1383*6467f958SSadaf Ebrahimi /* So our support is good */
1384*6467f958SSadaf Ebrahimi return 0;
1385*6467f958SSadaf Ebrahimi }
1386*6467f958SSadaf Ebrahimi
checkFor3DImageSupport(cl_device_id device)1387*6467f958SSadaf Ebrahimi int checkFor3DImageSupport(cl_device_id device)
1388*6467f958SSadaf Ebrahimi {
1389*6467f958SSadaf Ebrahimi cl_uint i;
1390*6467f958SSadaf Ebrahimi int error;
1391*6467f958SSadaf Ebrahimi
1392*6467f958SSadaf Ebrahimi /* Check the device props to see if images are supported at all first */
1393*6467f958SSadaf Ebrahimi error =
1394*6467f958SSadaf Ebrahimi clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
1395*6467f958SSadaf Ebrahimi test_error(error, "Unable to query device for image support");
1396*6467f958SSadaf Ebrahimi if (i == 0)
1397*6467f958SSadaf Ebrahimi {
1398*6467f958SSadaf Ebrahimi return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1399*6467f958SSadaf Ebrahimi }
1400*6467f958SSadaf Ebrahimi
1401*6467f958SSadaf Ebrahimi char profile[128];
1402*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
1403*6467f958SSadaf Ebrahimi NULL);
1404*6467f958SSadaf Ebrahimi test_error(error, "Unable to query device for CL_DEVICE_PROFILE");
1405*6467f958SSadaf Ebrahimi if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
1406*6467f958SSadaf Ebrahimi {
1407*6467f958SSadaf Ebrahimi size_t width = -1L;
1408*6467f958SSadaf Ebrahimi size_t height = -1L;
1409*6467f958SSadaf Ebrahimi size_t depth = -1L;
1410*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
1411*6467f958SSadaf Ebrahimi sizeof(width), &width, NULL);
1412*6467f958SSadaf Ebrahimi test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH");
1413*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
1414*6467f958SSadaf Ebrahimi sizeof(height), &height, NULL);
1415*6467f958SSadaf Ebrahimi test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT");
1416*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
1417*6467f958SSadaf Ebrahimi sizeof(depth), &depth, NULL);
1418*6467f958SSadaf Ebrahimi test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH");
1419*6467f958SSadaf Ebrahimi
1420*6467f958SSadaf Ebrahimi if (0 == (height | width | depth)) return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1421*6467f958SSadaf Ebrahimi }
1422*6467f958SSadaf Ebrahimi
1423*6467f958SSadaf Ebrahimi /* So our support is good */
1424*6467f958SSadaf Ebrahimi return 0;
1425*6467f958SSadaf Ebrahimi }
1426*6467f958SSadaf Ebrahimi
checkForReadWriteImageSupport(cl_device_id device)1427*6467f958SSadaf Ebrahimi int checkForReadWriteImageSupport(cl_device_id device)
1428*6467f958SSadaf Ebrahimi {
1429*6467f958SSadaf Ebrahimi if (checkForImageSupport(device))
1430*6467f958SSadaf Ebrahimi {
1431*6467f958SSadaf Ebrahimi return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1432*6467f958SSadaf Ebrahimi }
1433*6467f958SSadaf Ebrahimi
1434*6467f958SSadaf Ebrahimi auto device_cl_version = get_device_cl_version(device);
1435*6467f958SSadaf Ebrahimi if (device_cl_version >= Version(3, 0))
1436*6467f958SSadaf Ebrahimi {
1437*6467f958SSadaf Ebrahimi // In OpenCL 3.0, Read-Write images are optional.
1438*6467f958SSadaf Ebrahimi // Check if they are supported.
1439*6467f958SSadaf Ebrahimi cl_uint are_rw_images_supported{};
1440*6467f958SSadaf Ebrahimi test_error(
1441*6467f958SSadaf Ebrahimi clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
1442*6467f958SSadaf Ebrahimi sizeof(are_rw_images_supported),
1443*6467f958SSadaf Ebrahimi &are_rw_images_supported, nullptr),
1444*6467f958SSadaf Ebrahimi "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
1445*6467f958SSadaf Ebrahimi if (0 == are_rw_images_supported)
1446*6467f958SSadaf Ebrahimi {
1447*6467f958SSadaf Ebrahimi log_info("READ_WRITE_IMAGE tests skipped, not supported.\n");
1448*6467f958SSadaf Ebrahimi return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1449*6467f958SSadaf Ebrahimi }
1450*6467f958SSadaf Ebrahimi }
1451*6467f958SSadaf Ebrahimi // READ_WRITE images are not supported on 1.X devices.
1452*6467f958SSadaf Ebrahimi else if (device_cl_version < Version(2, 0))
1453*6467f958SSadaf Ebrahimi {
1454*6467f958SSadaf Ebrahimi log_info("READ_WRITE_IMAGE tests skipped, Opencl 2.0+ is requried.");
1455*6467f958SSadaf Ebrahimi return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1456*6467f958SSadaf Ebrahimi }
1457*6467f958SSadaf Ebrahimi // Support for read-write image arguments is required
1458*6467f958SSadaf Ebrahimi // for an 2.X device if the device supports images.
1459*6467f958SSadaf Ebrahimi
1460*6467f958SSadaf Ebrahimi /* So our support is good */
1461*6467f958SSadaf Ebrahimi return 0;
1462*6467f958SSadaf Ebrahimi }
1463*6467f958SSadaf Ebrahimi
get_min_alignment(cl_context context)1464*6467f958SSadaf Ebrahimi size_t get_min_alignment(cl_context context)
1465*6467f958SSadaf Ebrahimi {
1466*6467f958SSadaf Ebrahimi static cl_uint align_size = 0;
1467*6467f958SSadaf Ebrahimi
1468*6467f958SSadaf Ebrahimi if (0 == align_size)
1469*6467f958SSadaf Ebrahimi {
1470*6467f958SSadaf Ebrahimi cl_device_id *devices;
1471*6467f958SSadaf Ebrahimi size_t devices_size = 0;
1472*6467f958SSadaf Ebrahimi cl_uint result = 0;
1473*6467f958SSadaf Ebrahimi cl_int error;
1474*6467f958SSadaf Ebrahimi int i;
1475*6467f958SSadaf Ebrahimi
1476*6467f958SSadaf Ebrahimi error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
1477*6467f958SSadaf Ebrahimi &devices_size);
1478*6467f958SSadaf Ebrahimi test_error_ret(error, "clGetContextInfo failed", 0);
1479*6467f958SSadaf Ebrahimi
1480*6467f958SSadaf Ebrahimi devices = (cl_device_id *)malloc(devices_size);
1481*6467f958SSadaf Ebrahimi if (devices == NULL)
1482*6467f958SSadaf Ebrahimi {
1483*6467f958SSadaf Ebrahimi print_error(error, "malloc failed");
1484*6467f958SSadaf Ebrahimi return 0;
1485*6467f958SSadaf Ebrahimi }
1486*6467f958SSadaf Ebrahimi
1487*6467f958SSadaf Ebrahimi error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size,
1488*6467f958SSadaf Ebrahimi (void *)devices, NULL);
1489*6467f958SSadaf Ebrahimi test_error_ret(error, "clGetContextInfo failed", 0);
1490*6467f958SSadaf Ebrahimi
1491*6467f958SSadaf Ebrahimi for (i = 0; i < (int)(devices_size / sizeof(cl_device_id)); i++)
1492*6467f958SSadaf Ebrahimi {
1493*6467f958SSadaf Ebrahimi cl_uint alignment = 0;
1494*6467f958SSadaf Ebrahimi
1495*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(devices[i], CL_DEVICE_MEM_BASE_ADDR_ALIGN,
1496*6467f958SSadaf Ebrahimi sizeof(cl_uint), (void *)&alignment, NULL);
1497*6467f958SSadaf Ebrahimi
1498*6467f958SSadaf Ebrahimi if (error == CL_SUCCESS)
1499*6467f958SSadaf Ebrahimi {
1500*6467f958SSadaf Ebrahimi alignment >>= 3; // convert bits to bytes
1501*6467f958SSadaf Ebrahimi result = (alignment > result) ? alignment : result;
1502*6467f958SSadaf Ebrahimi }
1503*6467f958SSadaf Ebrahimi else
1504*6467f958SSadaf Ebrahimi print_error(error, "clGetDeviceInfo failed");
1505*6467f958SSadaf Ebrahimi }
1506*6467f958SSadaf Ebrahimi
1507*6467f958SSadaf Ebrahimi align_size = result;
1508*6467f958SSadaf Ebrahimi free(devices);
1509*6467f958SSadaf Ebrahimi }
1510*6467f958SSadaf Ebrahimi
1511*6467f958SSadaf Ebrahimi return align_size;
1512*6467f958SSadaf Ebrahimi }
1513*6467f958SSadaf Ebrahimi
get_default_rounding_mode(cl_device_id device,const cl_uint & param)1514*6467f958SSadaf Ebrahimi cl_device_fp_config get_default_rounding_mode(cl_device_id device,
1515*6467f958SSadaf Ebrahimi const cl_uint ¶m)
1516*6467f958SSadaf Ebrahimi {
1517*6467f958SSadaf Ebrahimi if (param == CL_DEVICE_DOUBLE_FP_CONFIG)
1518*6467f958SSadaf Ebrahimi test_error_ret(
1519*6467f958SSadaf Ebrahimi -1,
1520*6467f958SSadaf Ebrahimi "FAILURE: CL_DEVICE_DOUBLE_FP_CONFIG not supported by this routine",
1521*6467f958SSadaf Ebrahimi 0);
1522*6467f958SSadaf Ebrahimi
1523*6467f958SSadaf Ebrahimi char profileStr[128] = "";
1524*6467f958SSadaf Ebrahimi cl_device_fp_config single = 0;
1525*6467f958SSadaf Ebrahimi int error = clGetDeviceInfo(device, param, sizeof(single), &single, NULL);
1526*6467f958SSadaf Ebrahimi if (error)
1527*6467f958SSadaf Ebrahimi {
1528*6467f958SSadaf Ebrahimi std::string message = std::string("Unable to get device ")
1529*6467f958SSadaf Ebrahimi + std::string(param == CL_DEVICE_HALF_FP_CONFIG
1530*6467f958SSadaf Ebrahimi ? "CL_DEVICE_HALF_FP_CONFIG"
1531*6467f958SSadaf Ebrahimi : "CL_DEVICE_SINGLE_FP_CONFIG");
1532*6467f958SSadaf Ebrahimi test_error_ret(error, message.c_str(), 0);
1533*6467f958SSadaf Ebrahimi }
1534*6467f958SSadaf Ebrahimi
1535*6467f958SSadaf Ebrahimi if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
1536*6467f958SSadaf Ebrahimi
1537*6467f958SSadaf Ebrahimi if (0 == (single & CL_FP_ROUND_TO_ZERO))
1538*6467f958SSadaf Ebrahimi test_error_ret(-1,
1539*6467f958SSadaf Ebrahimi "FAILURE: device must support either "
1540*6467f958SSadaf Ebrahimi "CL_FP_ROUND_TO_ZERO or CL_FP_ROUND_TO_NEAREST",
1541*6467f958SSadaf Ebrahimi 0);
1542*6467f958SSadaf Ebrahimi
1543*6467f958SSadaf Ebrahimi // Make sure we are an embedded device before allowing a pass
1544*6467f958SSadaf Ebrahimi if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profileStr),
1545*6467f958SSadaf Ebrahimi &profileStr, NULL)))
1546*6467f958SSadaf Ebrahimi test_error_ret(error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0);
1547*6467f958SSadaf Ebrahimi
1548*6467f958SSadaf Ebrahimi if (strcmp(profileStr, "EMBEDDED_PROFILE"))
1549*6467f958SSadaf Ebrahimi test_error_ret(error,
1550*6467f958SSadaf Ebrahimi "FAILURE: non-EMBEDDED_PROFILE devices must support "
1551*6467f958SSadaf Ebrahimi "CL_FP_ROUND_TO_NEAREST",
1552*6467f958SSadaf Ebrahimi 0);
1553*6467f958SSadaf Ebrahimi
1554*6467f958SSadaf Ebrahimi return CL_FP_ROUND_TO_ZERO;
1555*6467f958SSadaf Ebrahimi }
1556*6467f958SSadaf Ebrahimi
checkDeviceForQueueSupport(cl_device_id device,cl_command_queue_properties prop)1557*6467f958SSadaf Ebrahimi int checkDeviceForQueueSupport(cl_device_id device,
1558*6467f958SSadaf Ebrahimi cl_command_queue_properties prop)
1559*6467f958SSadaf Ebrahimi {
1560*6467f958SSadaf Ebrahimi cl_command_queue_properties realProps;
1561*6467f958SSadaf Ebrahimi cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
1562*6467f958SSadaf Ebrahimi sizeof(realProps), &realProps, NULL);
1563*6467f958SSadaf Ebrahimi test_error_ret(error, "FAILURE: Unable to get device queue properties", 0);
1564*6467f958SSadaf Ebrahimi
1565*6467f958SSadaf Ebrahimi return (realProps & prop) ? 1 : 0;
1566*6467f958SSadaf Ebrahimi }
1567*6467f958SSadaf Ebrahimi
printDeviceHeader(cl_device_id device)1568*6467f958SSadaf Ebrahimi int printDeviceHeader(cl_device_id device)
1569*6467f958SSadaf Ebrahimi {
1570*6467f958SSadaf Ebrahimi char deviceName[512], deviceVendor[512], deviceVersion[512],
1571*6467f958SSadaf Ebrahimi cLangVersion[512];
1572*6467f958SSadaf Ebrahimi int error;
1573*6467f958SSadaf Ebrahimi
1574*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName),
1575*6467f958SSadaf Ebrahimi deviceName, NULL);
1576*6467f958SSadaf Ebrahimi test_error(error, "Unable to get CL_DEVICE_NAME for device");
1577*6467f958SSadaf Ebrahimi
1578*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(deviceVendor),
1579*6467f958SSadaf Ebrahimi deviceVendor, NULL);
1580*6467f958SSadaf Ebrahimi test_error(error, "Unable to get CL_DEVICE_VENDOR for device");
1581*6467f958SSadaf Ebrahimi
1582*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion),
1583*6467f958SSadaf Ebrahimi deviceVersion, NULL);
1584*6467f958SSadaf Ebrahimi test_error(error, "Unable to get CL_DEVICE_VERSION for device");
1585*6467f958SSadaf Ebrahimi
1586*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
1587*6467f958SSadaf Ebrahimi sizeof(cLangVersion), cLangVersion, NULL);
1588*6467f958SSadaf Ebrahimi test_error(error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device");
1589*6467f958SSadaf Ebrahimi
1590*6467f958SSadaf Ebrahimi log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute "
1591*6467f958SSadaf Ebrahimi "Device Version = %s%s%s\n",
1592*6467f958SSadaf Ebrahimi deviceName, deviceVendor, deviceVersion,
1593*6467f958SSadaf Ebrahimi (error == CL_SUCCESS) ? ", CL C Version = " : "",
1594*6467f958SSadaf Ebrahimi (error == CL_SUCCESS) ? cLangVersion : "");
1595*6467f958SSadaf Ebrahimi
1596*6467f958SSadaf Ebrahimi auto version = get_device_cl_version(device);
1597*6467f958SSadaf Ebrahimi if (version >= Version(3, 0))
1598*6467f958SSadaf Ebrahimi {
1599*6467f958SSadaf Ebrahimi auto ctsVersion = get_device_info_string(
1600*6467f958SSadaf Ebrahimi device, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED);
1601*6467f958SSadaf Ebrahimi log_info("Device latest conformance version passed: %s\n",
1602*6467f958SSadaf Ebrahimi ctsVersion.c_str());
1603*6467f958SSadaf Ebrahimi }
1604*6467f958SSadaf Ebrahimi
1605*6467f958SSadaf Ebrahimi return CL_SUCCESS;
1606*6467f958SSadaf Ebrahimi }
1607*6467f958SSadaf Ebrahimi
get_device_cl_c_version(cl_device_id device)1608*6467f958SSadaf Ebrahimi Version get_device_cl_c_version(cl_device_id device)
1609*6467f958SSadaf Ebrahimi {
1610*6467f958SSadaf Ebrahimi auto device_cl_version = get_device_cl_version(device);
1611*6467f958SSadaf Ebrahimi
1612*6467f958SSadaf Ebrahimi // The second special case is OpenCL-1.0 where CL_DEVICE_OPENCL_C_VERSION
1613*6467f958SSadaf Ebrahimi // did not exist, but since this is just the first version we can
1614*6467f958SSadaf Ebrahimi // return 1.0.
1615*6467f958SSadaf Ebrahimi if (device_cl_version == Version{ 1, 0 })
1616*6467f958SSadaf Ebrahimi {
1617*6467f958SSadaf Ebrahimi return Version{ 1, 0 };
1618*6467f958SSadaf Ebrahimi }
1619*6467f958SSadaf Ebrahimi
1620*6467f958SSadaf Ebrahimi // Otherwise we know we have a 1.1 <= device_version <= 2.0 where all CL C
1621*6467f958SSadaf Ebrahimi // versions are backwards compatible, hence querying with the
1622*6467f958SSadaf Ebrahimi // CL_DEVICE_OPENCL_C_VERSION query must return the most recent supported
1623*6467f958SSadaf Ebrahimi // OpenCL C version.
1624*6467f958SSadaf Ebrahimi size_t opencl_c_version_size_in_bytes{};
1625*6467f958SSadaf Ebrahimi auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr,
1626*6467f958SSadaf Ebrahimi &opencl_c_version_size_in_bytes);
1627*6467f958SSadaf Ebrahimi test_error_ret(error,
1628*6467f958SSadaf Ebrahimi "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
1629*6467f958SSadaf Ebrahimi (Version{ -1, 0 }));
1630*6467f958SSadaf Ebrahimi
1631*6467f958SSadaf Ebrahimi std::string opencl_c_version(opencl_c_version_size_in_bytes, '\0');
1632*6467f958SSadaf Ebrahimi error =
1633*6467f958SSadaf Ebrahimi clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
1634*6467f958SSadaf Ebrahimi opencl_c_version.size(), &opencl_c_version[0], nullptr);
1635*6467f958SSadaf Ebrahimi
1636*6467f958SSadaf Ebrahimi test_error_ret(error,
1637*6467f958SSadaf Ebrahimi "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
1638*6467f958SSadaf Ebrahimi (Version{ -1, 0 }));
1639*6467f958SSadaf Ebrahimi
1640*6467f958SSadaf Ebrahimi // Scrape out the major, minor pair from the string.
1641*6467f958SSadaf Ebrahimi auto major = opencl_c_version[opencl_c_version.find('.') - 1];
1642*6467f958SSadaf Ebrahimi auto minor = opencl_c_version[opencl_c_version.find('.') + 1];
1643*6467f958SSadaf Ebrahimi
1644*6467f958SSadaf Ebrahimi return Version{ major - '0', minor - '0' };
1645*6467f958SSadaf Ebrahimi }
1646*6467f958SSadaf Ebrahimi
get_device_latest_cl_c_version(cl_device_id device)1647*6467f958SSadaf Ebrahimi Version get_device_latest_cl_c_version(cl_device_id device)
1648*6467f958SSadaf Ebrahimi {
1649*6467f958SSadaf Ebrahimi auto device_cl_version = get_device_cl_version(device);
1650*6467f958SSadaf Ebrahimi
1651*6467f958SSadaf Ebrahimi // If the device version >= 3.0 it must support the
1652*6467f958SSadaf Ebrahimi // CL_DEVICE_OPENCL_C_ALL_VERSIONS query from which we can extract the most
1653*6467f958SSadaf Ebrahimi // recent CL C version supported by the device.
1654*6467f958SSadaf Ebrahimi if (device_cl_version >= Version{ 3, 0 })
1655*6467f958SSadaf Ebrahimi {
1656*6467f958SSadaf Ebrahimi size_t opencl_c_all_versions_size_in_bytes{};
1657*6467f958SSadaf Ebrahimi auto error =
1658*6467f958SSadaf Ebrahimi clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
1659*6467f958SSadaf Ebrahimi &opencl_c_all_versions_size_in_bytes);
1660*6467f958SSadaf Ebrahimi test_error_ret(
1661*6467f958SSadaf Ebrahimi error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1662*6467f958SSadaf Ebrahimi (Version{ -1, 0 }));
1663*6467f958SSadaf Ebrahimi std::vector<cl_name_version> name_versions(
1664*6467f958SSadaf Ebrahimi opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
1665*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
1666*6467f958SSadaf Ebrahimi opencl_c_all_versions_size_in_bytes,
1667*6467f958SSadaf Ebrahimi name_versions.data(), nullptr);
1668*6467f958SSadaf Ebrahimi test_error_ret(
1669*6467f958SSadaf Ebrahimi error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1670*6467f958SSadaf Ebrahimi (Version{ -1, 0 }));
1671*6467f958SSadaf Ebrahimi
1672*6467f958SSadaf Ebrahimi Version max_supported_cl_c_version{};
1673*6467f958SSadaf Ebrahimi for (const auto &name_version : name_versions)
1674*6467f958SSadaf Ebrahimi {
1675*6467f958SSadaf Ebrahimi Version current_version{
1676*6467f958SSadaf Ebrahimi static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
1677*6467f958SSadaf Ebrahimi static_cast<int>(CL_VERSION_MINOR(name_version.version))
1678*6467f958SSadaf Ebrahimi };
1679*6467f958SSadaf Ebrahimi max_supported_cl_c_version =
1680*6467f958SSadaf Ebrahimi (current_version > max_supported_cl_c_version)
1681*6467f958SSadaf Ebrahimi ? current_version
1682*6467f958SSadaf Ebrahimi : max_supported_cl_c_version;
1683*6467f958SSadaf Ebrahimi }
1684*6467f958SSadaf Ebrahimi return max_supported_cl_c_version;
1685*6467f958SSadaf Ebrahimi }
1686*6467f958SSadaf Ebrahimi
1687*6467f958SSadaf Ebrahimi return get_device_cl_c_version(device);
1688*6467f958SSadaf Ebrahimi }
1689*6467f958SSadaf Ebrahimi
get_max_OpenCL_C_for_context(cl_context context)1690*6467f958SSadaf Ebrahimi Version get_max_OpenCL_C_for_context(cl_context context)
1691*6467f958SSadaf Ebrahimi {
1692*6467f958SSadaf Ebrahimi // Get all the devices in the context and find the maximum
1693*6467f958SSadaf Ebrahimi // universally supported OpenCL C version.
1694*6467f958SSadaf Ebrahimi size_t devices_size_in_bytes{};
1695*6467f958SSadaf Ebrahimi auto error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr,
1696*6467f958SSadaf Ebrahimi &devices_size_in_bytes);
1697*6467f958SSadaf Ebrahimi test_error_ret(error, "clGetDeviceInfo failed for CL_CONTEXT_DEVICES",
1698*6467f958SSadaf Ebrahimi (Version{ -1, 0 }));
1699*6467f958SSadaf Ebrahimi std::vector<cl_device_id> devices(devices_size_in_bytes
1700*6467f958SSadaf Ebrahimi / sizeof(cl_device_id));
1701*6467f958SSadaf Ebrahimi error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size_in_bytes,
1702*6467f958SSadaf Ebrahimi devices.data(), nullptr);
1703*6467f958SSadaf Ebrahimi auto current_version = get_device_latest_cl_c_version(devices[0]);
1704*6467f958SSadaf Ebrahimi std::for_each(std::next(devices.begin()), devices.end(),
1705*6467f958SSadaf Ebrahimi [¤t_version](cl_device_id device) {
1706*6467f958SSadaf Ebrahimi auto device_version =
1707*6467f958SSadaf Ebrahimi get_device_latest_cl_c_version(device);
1708*6467f958SSadaf Ebrahimi // OpenCL 3.0 is not backwards compatible with 2.0.
1709*6467f958SSadaf Ebrahimi // If we have 3.0 and 2.0 in the same driver we
1710*6467f958SSadaf Ebrahimi // use 1.2.
1711*6467f958SSadaf Ebrahimi if (((device_version >= Version(2, 0)
1712*6467f958SSadaf Ebrahimi && device_version < Version(3, 0))
1713*6467f958SSadaf Ebrahimi && current_version >= Version(3, 0))
1714*6467f958SSadaf Ebrahimi || (device_version >= Version(3, 0)
1715*6467f958SSadaf Ebrahimi && (current_version >= Version(2, 0)
1716*6467f958SSadaf Ebrahimi && current_version < Version(3, 0))))
1717*6467f958SSadaf Ebrahimi {
1718*6467f958SSadaf Ebrahimi current_version = Version(1, 2);
1719*6467f958SSadaf Ebrahimi }
1720*6467f958SSadaf Ebrahimi else
1721*6467f958SSadaf Ebrahimi {
1722*6467f958SSadaf Ebrahimi current_version =
1723*6467f958SSadaf Ebrahimi std::min(device_version, current_version);
1724*6467f958SSadaf Ebrahimi }
1725*6467f958SSadaf Ebrahimi });
1726*6467f958SSadaf Ebrahimi return current_version;
1727*6467f958SSadaf Ebrahimi }
1728*6467f958SSadaf Ebrahimi
device_supports_cl_c_version(cl_device_id device,Version version)1729*6467f958SSadaf Ebrahimi bool device_supports_cl_c_version(cl_device_id device, Version version)
1730*6467f958SSadaf Ebrahimi {
1731*6467f958SSadaf Ebrahimi auto device_cl_version = get_device_cl_version(device);
1732*6467f958SSadaf Ebrahimi
1733*6467f958SSadaf Ebrahimi // In general, a device does not support an OpenCL C version if it is <=
1734*6467f958SSadaf Ebrahimi // CL_DEVICE_OPENCL_C_VERSION AND it does not appear in the
1735*6467f958SSadaf Ebrahimi // CL_DEVICE_OPENCL_C_ALL_VERSIONS query.
1736*6467f958SSadaf Ebrahimi
1737*6467f958SSadaf Ebrahimi // If the device version >= 3.0 it must support the
1738*6467f958SSadaf Ebrahimi // CL_DEVICE_OPENCL_C_ALL_VERSIONS query, and the version of OpenCL C being
1739*6467f958SSadaf Ebrahimi // used must appear in the query result if it's <=
1740*6467f958SSadaf Ebrahimi // CL_DEVICE_OPENCL_C_VERSION.
1741*6467f958SSadaf Ebrahimi if (device_cl_version >= Version{ 3, 0 })
1742*6467f958SSadaf Ebrahimi {
1743*6467f958SSadaf Ebrahimi size_t opencl_c_all_versions_size_in_bytes{};
1744*6467f958SSadaf Ebrahimi auto error =
1745*6467f958SSadaf Ebrahimi clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
1746*6467f958SSadaf Ebrahimi &opencl_c_all_versions_size_in_bytes);
1747*6467f958SSadaf Ebrahimi test_error_ret(
1748*6467f958SSadaf Ebrahimi error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1749*6467f958SSadaf Ebrahimi (false));
1750*6467f958SSadaf Ebrahimi std::vector<cl_name_version> name_versions(
1751*6467f958SSadaf Ebrahimi opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
1752*6467f958SSadaf Ebrahimi error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
1753*6467f958SSadaf Ebrahimi opencl_c_all_versions_size_in_bytes,
1754*6467f958SSadaf Ebrahimi name_versions.data(), nullptr);
1755*6467f958SSadaf Ebrahimi test_error_ret(
1756*6467f958SSadaf Ebrahimi error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1757*6467f958SSadaf Ebrahimi (false));
1758*6467f958SSadaf Ebrahimi
1759*6467f958SSadaf Ebrahimi for (const auto &name_version : name_versions)
1760*6467f958SSadaf Ebrahimi {
1761*6467f958SSadaf Ebrahimi Version current_version{
1762*6467f958SSadaf Ebrahimi static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
1763*6467f958SSadaf Ebrahimi static_cast<int>(CL_VERSION_MINOR(name_version.version))
1764*6467f958SSadaf Ebrahimi };
1765*6467f958SSadaf Ebrahimi if (current_version == version)
1766*6467f958SSadaf Ebrahimi {
1767*6467f958SSadaf Ebrahimi return true;
1768*6467f958SSadaf Ebrahimi }
1769*6467f958SSadaf Ebrahimi }
1770*6467f958SSadaf Ebrahimi }
1771*6467f958SSadaf Ebrahimi
1772*6467f958SSadaf Ebrahimi return version <= get_device_cl_c_version(device);
1773*6467f958SSadaf Ebrahimi }
1774*6467f958SSadaf Ebrahimi
poll_until(unsigned timeout_ms,unsigned interval_ms,std::function<bool ()> fn)1775*6467f958SSadaf Ebrahimi bool poll_until(unsigned timeout_ms, unsigned interval_ms,
1776*6467f958SSadaf Ebrahimi std::function<bool()> fn)
1777*6467f958SSadaf Ebrahimi {
1778*6467f958SSadaf Ebrahimi unsigned time_spent_ms = 0;
1779*6467f958SSadaf Ebrahimi bool ret = false;
1780*6467f958SSadaf Ebrahimi
1781*6467f958SSadaf Ebrahimi while (time_spent_ms < timeout_ms)
1782*6467f958SSadaf Ebrahimi {
1783*6467f958SSadaf Ebrahimi ret = fn();
1784*6467f958SSadaf Ebrahimi if (ret)
1785*6467f958SSadaf Ebrahimi {
1786*6467f958SSadaf Ebrahimi break;
1787*6467f958SSadaf Ebrahimi }
1788*6467f958SSadaf Ebrahimi usleep(interval_ms * 1000);
1789*6467f958SSadaf Ebrahimi time_spent_ms += interval_ms;
1790*6467f958SSadaf Ebrahimi }
1791*6467f958SSadaf Ebrahimi
1792*6467f958SSadaf Ebrahimi return ret;
1793*6467f958SSadaf Ebrahimi }
1794*6467f958SSadaf Ebrahimi
device_supports_double(cl_device_id device)1795*6467f958SSadaf Ebrahimi bool device_supports_double(cl_device_id device)
1796*6467f958SSadaf Ebrahimi {
1797*6467f958SSadaf Ebrahimi if (is_extension_available(device, "cl_khr_fp64"))
1798*6467f958SSadaf Ebrahimi {
1799*6467f958SSadaf Ebrahimi return true;
1800*6467f958SSadaf Ebrahimi }
1801*6467f958SSadaf Ebrahimi else
1802*6467f958SSadaf Ebrahimi {
1803*6467f958SSadaf Ebrahimi cl_device_fp_config double_fp_config;
1804*6467f958SSadaf Ebrahimi cl_int err = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
1805*6467f958SSadaf Ebrahimi sizeof(double_fp_config),
1806*6467f958SSadaf Ebrahimi &double_fp_config, nullptr);
1807*6467f958SSadaf Ebrahimi test_error(err,
1808*6467f958SSadaf Ebrahimi "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
1809*6467f958SSadaf Ebrahimi return double_fp_config != 0;
1810*6467f958SSadaf Ebrahimi }
1811*6467f958SSadaf Ebrahimi }
1812*6467f958SSadaf Ebrahimi
device_supports_half(cl_device_id device)1813*6467f958SSadaf Ebrahimi bool device_supports_half(cl_device_id device)
1814*6467f958SSadaf Ebrahimi {
1815*6467f958SSadaf Ebrahimi return is_extension_available(device, "cl_khr_fp16");
1816*6467f958SSadaf Ebrahimi }
1817