xref: /aosp_15_r20/external/android-nn-driver/1.3/ArmnnDriverImpl.cpp (revision 3e777be0405cee09af5d5785ff37f7cfb5bee59a)
1 //
2 // Copyright © 2020, 2023 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ArmnnDriverImpl.hpp"
7 #include "../ArmnnPreparedModel_1_3.hpp"
8 #include "../ModelToINetworkConverter.hpp"
9 #include "../SystemPropertiesUtils.hpp"
10 
11 #include <armnnDeserializer/IDeserializer.hpp>
12 
13 #include <log/log.h>
14 
15 #include <sys/stat.h>
16 #include <chrono>
17 
18 namespace
19 {
20 const char *g_RelaxedFloat32toFloat16PerformanceExecTime    = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
21 const char *g_RelaxedFloat32toFloat16PerformancePowerUsage  = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
22 
23 const char *g_ifPerformanceExecTime                         = "ArmNN.ifPerformance.execTime";
24 const char *g_ifPerformancePowerUsage                       = "ArmNN.ifPerformance.powerUsage";
25 
26 const char *g_whilePerformanceExecTime                      = "ArmNN.whilePerformance.execTime";
27 const char *g_whilePerformancePowerUsage                    = "ArmNN.whilePerformance.powerUsage";
28 
29 const char *g_OperandTypeTensorFloat32PerformanceExecTime   = "Armnn.operandTypeTensorFloat32Performance.execTime";
30 const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
31 
32 const char *g_OperandTypeFloat32PerformanceExecTime         = "Armnn.operandTypeFloat32Performance.execTime";
33 const char *g_OperandTypeFloat32PerformancePowerUsage       = "Armnn.operandTypeFloat32Performance.powerUsage";
34 
35 const char *g_OperandTypeTensorFloat16PerformanceExecTime   = "Armnn.operandTypeTensorFloat16Performance.execTime";
36 const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
37 
38 const char *g_OperandTypeFloat16PerformanceExecTime         = "Armnn.operandTypeFloat16Performance.execTime";
39 const char *g_OperandTypeFloat16PerformancePowerUsage       = "Armnn.operandTypeFloat16Performance.powerUsage";
40 
41 const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
42         "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
43 const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
44         "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
45 
46 const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime =
47     "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime";
48 const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage =
49     "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage";
50 
51 const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
52         "Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
53 const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
54         "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
55 
56 const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
57         "Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
58 const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
59         "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
60 
61 const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =
62     "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";
63 const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =
64     "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";
65 
66 
67 const char *g_OperandTypeTensorInt32PerformanceExecTime     = "Armnn.operandTypeTensorInt32Performance.execTime";
68 const char *g_OperandTypeTensorInt32PerformancePowerUsage   = "Armnn.operandTypeTensorInt32Performance.powerUsage";
69 
70 const char *g_OperandTypeInt32PerformanceExecTime           = "Armnn.operandTypeInt32Performance.execTime";
71 const char *g_OperandTypeInt32PerformancePowerUsage         = "Armnn.operandTypeInt32Performance.powerUsage";
72 
73 
NotifyCallbackAndCheck(const android::sp<V1_3::IPreparedModelCallback> & callback,V1_3::ErrorStatus errorStatus,const android::sp<V1_3::IPreparedModel> & preparedModelPtr)74 void NotifyCallbackAndCheck(const android::sp<V1_3::IPreparedModelCallback>& callback,
75                             V1_3::ErrorStatus errorStatus,
76                             const android::sp<V1_3::IPreparedModel>& preparedModelPtr)
77 {
78     Return<void> returned = callback->notify_1_3(errorStatus, preparedModelPtr);
79     // This check is required, if the callback fails and it isn't checked it will bring down the service
80     if (!returned.isOk())
81     {
82         ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
83               returned.description().c_str());
84     }
85 }
86 
FailPrepareModel(V1_3::ErrorStatus error,const std::string & message,const android::sp<V1_3::IPreparedModelCallback> & callback)87 Return<V1_3::ErrorStatus> FailPrepareModel(V1_3::ErrorStatus error,
88                                            const std::string& message,
89                                            const android::sp<V1_3::IPreparedModelCallback>& callback)
90 {
91     ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
92     NotifyCallbackAndCheck(callback, error, nullptr);
93     return error;
94 }
95 
96 } // anonymous namespace
97 
98 namespace armnn_driver
99 {
100 namespace hal_1_3
101 {
102 
prepareArmnnModel_1_3(const armnn::IRuntimePtr & runtime,const armnn::IGpuAccTunedParametersPtr & clTunedParameters,const DriverOptions & options,const V1_3::Model & model,const android::hardware::hidl_vec<android::hardware::hidl_handle> & modelCacheHandle,const android::hardware::hidl_vec<android::hardware::hidl_handle> & dataCacheHandle,const HidlToken & token,const android::sp<V1_3::IPreparedModelCallback> & cb,bool float32ToFloat16,V1_3::Priority priority)103 Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
104        const armnn::IRuntimePtr& runtime,
105        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
106        const DriverOptions& options,
107        const V1_3::Model& model,
108        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
109        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
110        const HidlToken& token,
111        const android::sp<V1_3::IPreparedModelCallback>& cb,
112        bool float32ToFloat16,
113        V1_3::Priority priority)
114 {
115     ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");
116 
117     std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now();
118 
119     if (cb.get() == nullptr)
120     {
121         ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
122         return V1_3::ErrorStatus::INVALID_ARGUMENT;
123     }
124 
125     if (!runtime)
126     {
127         return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
128     }
129 
130     if (!android::nn::validateModel(model))
131     {
132         return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
133     }
134 
135     // Deliberately ignore any unsupported operations requested by the options -
136     // at this point we're being asked to prepare a model that we've already declared support for
137     // and the operation indices may be different to those in getSupportedOperations anyway.
138     std::set<unsigned int> unsupportedOperations;
139     ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
140                                                        model,
141                                                        unsupportedOperations);
142 
143     if (modelConverter.GetConversionResult() != ConversionResult::Success)
144     {
145         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
146         return V1_3::ErrorStatus::NONE;
147     }
148 
149     // Serialize the network graph to a .armnn file if an output directory
150     // has been specified in the drivers' arguments.
151     std::vector<uint8_t> dataCacheData;
152     bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
153     auto serializedNetworkFileName =
154         SerializeNetwork(*modelConverter.GetINetwork(),
155                          options.GetRequestInputsAndOutputsDumpDir(),
156                          dataCacheData,
157                          serializeToFile);
158 
159     // Optimize the network
160     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
161     armnn::OptimizerOptionsOpaque OptOptions;
162     OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
163     OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
164 
165     int cachedFd = -1;
166     bool saveCachedNetwork = options.SaveCachedNetwork();
167 
168     unsigned int numberOfCachedModelFiles = 0;
169     if (modelCacheHandle.size() > 0)
170     {
171         unsigned int index = 0;
172         for (auto& backend : options.GetBackends())
173         {
174             // modelCacheHandle size should be equal to numberOfCachedModelFiles
175             // modelCacheHandle vector should be in same order as backends
176             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
177             if (numberOfCacheFiles > 0)
178             {
179                 numberOfCachedModelFiles += numberOfCacheFiles;
180                 if (modelCacheHandle[index]->numFds == 1)
181                 {
182                     // For GpuAcc numberOfCachedFiles is 1
183                     if (backend == armnn::Compute::GpuAcc)
184                     {
185                         cachedFd = modelCacheHandle[index]->data[0];
186                         saveCachedNetwork = true;
187                     }
188                 }
189                 index += numberOfCachedModelFiles;
190             }
191         }
192     }
193 
194     armnn::BackendOptions gpuAcc("GpuAcc",
195     {
196         { "FastMathEnabled", options.IsFastMathEnabled() },
197         { "SaveCachedNetwork", saveCachedNetwork },
198         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
199         { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
200         { "CachedFileDescriptor", cachedFd }
201     });
202 
203     armnn::BackendOptions cpuAcc("CpuAcc",
204     {
205         { "FastMathEnabled", options.IsFastMathEnabled() },
206         { "NumberOfThreads", options.GetNumberOfThreads() }
207     });
208     OptOptions.AddModelOption(gpuAcc);
209     OptOptions.AddModelOption(cpuAcc);
210 
211     std::vector<std::string> errMessages;
212     try
213     {
214         optNet = armnn::Optimize(*modelConverter.GetINetwork(),
215                                  options.GetBackends(),
216                                  runtime->GetDeviceSpec(),
217                                  OptOptions,
218                                  errMessages);
219     }
220     catch (std::exception& e)
221     {
222         std::stringstream message;
223         message << "Exception (" << e.what() << ") caught from optimize.";
224         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
225         return V1_3::ErrorStatus::NONE;
226     }
227 
228     // Check that the optimized network is valid.
229     if (!optNet)
230     {
231         std::stringstream message;
232         message << "Invalid optimized network";
233         for (const std::string& msg : errMessages)
234         {
235             message << "\n" << msg;
236         }
237         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
238         return V1_3::ErrorStatus::NONE;
239     }
240 
241     // Export the optimized network graph to a dot file if an output dump directory
242     // has been specified in the drivers' arguments.
243     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
244                                                                options.GetRequestInputsAndOutputsDumpDir());
245 
246     // Load it into the runtime.
247     armnn::NetworkId netId = 0;
248     std::string msg;
249     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
250                                                 MemorySource::Undefined,
251                                                 MemorySource::Undefined,
252                                                 options.IsGpuProfilingEnabled());
253 
254     auto numInputs  = getMainModel(model).inputIndexes.size();
255     auto numOutputs = getMainModel(model).outputIndexes.size();
256     try
257     {
258         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
259         {
260             return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
261         }
262     }
263     catch (std::exception& e)
264     {
265         std::stringstream message;
266         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
267         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
268         return V1_3::ErrorStatus::NONE;
269     }
270 
271     // Now that we have a networkId for the graph rename the exported files to use it
272     // so that we can associate the graph file and the input/output tensor exported files
273     RenameExportedFiles(serializedNetworkFileName,
274                         dotGraphFileName,
275                         options.GetRequestInputsAndOutputsDumpDir(),
276                         netId);
277 
278     std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
279             new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(
280                     netId,
281                     runtime.get(),
282                     model,
283                     options.GetRequestInputsAndOutputsDumpDir(),
284                     options.IsGpuProfilingEnabled(),
285                     priority,
286                     options.isAsyncModelExecutionEnabled(),
287                     options.getNoOfArmnnThreads(),
288                     options.isImportEnabled(),
289                     options.isExportEnabled()));
290 
291     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
292     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
293     // Only run this if the GpuAcc backend has been added to options
294     if (std::find(options.GetBackends().begin(),
295                   options.GetBackends().end(),
296                   armnn::Compute::GpuAcc) != options.GetBackends().end())
297     {
298         if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
299         {
300             return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
301         }
302 
303         if (clTunedParameters &&
304             options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
305         {
306             // Now that we've done one inference the CL kernel parameters will have been tuned,
307             // so save the updated file.
308             try
309             {
310                 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
311             }
312             catch (std::exception& error)
313             {
314                 ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
315                       options.GetClTunedParametersFile().c_str(), error.what());
316             }
317         }
318     }
319     size_t hashValue = 0;
320     // Cache the model
321     if (dataCacheHandle.size() > 0)
322     {
323         // Cache the Arm NN model
324         if (dataCacheHandle.size() != 1)
325         {
326             NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
327             return V1_3::ErrorStatus::NONE;
328         }
329 
330         if (dataCacheHandle[0]->numFds != 1)
331         {
332             ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
333             NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
334             return V1_3::ErrorStatus::NONE;
335         }
336 
337         if (dataCacheHandle[0]->data[0] < 0)
338         {
339             ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, fd < 0");
340             NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
341             return V1_3::ErrorStatus::NONE;
342         }
343 
344         int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
345         if (dataCacheFileAccessMode != O_RDWR)
346         {
347             ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Invalid Access Mode.");
348             NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
349             return V1_3::ErrorStatus::NONE;
350         }
351 
352         write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
353         hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
354     }
355 
356     // Cache the model data
357     if (modelCacheHandle.size() > 0)
358     {
359         if (modelCacheHandle.size() != numberOfCachedModelFiles)
360         {
361             NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
362             return V1_3::ErrorStatus::NONE;
363         }
364 
365         for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
366         {
367             if (modelCacheHandle[i]->numFds == 1)
368             {
369                 int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
370                 if (modelCacheFileAccessMode != O_RDONLY)
371                 {
372                     struct stat statBuffer;
373                     if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
374                     {
375                         long modelDataSize = statBuffer.st_size;
376                         if (modelDataSize > 0)
377                         {
378                             std::vector<uint8_t> modelData(modelDataSize);
379                             pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
380                             hashValue ^= CacheDataHandlerInstance().Hash(modelData);
381                         }
382                     }
383                 }
384             }
385         }
386     }
387     if (hashValue != 0)
388     {
389         CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
390     }
391 
392     NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
393 
394     ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds>
395          (std::chrono::system_clock::now() - prepareModelTimepoint).count());
396 
397 
398     return V1_3::ErrorStatus::NONE;
399 }
400 
prepareModelFromCache_1_3(const armnn::IRuntimePtr & runtime,const DriverOptions & options,const android::hardware::hidl_vec<android::hardware::hidl_handle> & modelCacheHandle,const android::hardware::hidl_vec<android::hardware::hidl_handle> & dataCacheHandle,const HidlToken & token,const android::sp<V1_3::IPreparedModelCallback> & cb)401 Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache_1_3(
402     const armnn::IRuntimePtr& runtime,
403     const DriverOptions& options,
404     const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
405     const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
406     const HidlToken& token,
407     const android::sp<V1_3::IPreparedModelCallback>& cb)
408 {
409     ALOGV("ArmnnDriverImpl::prepareModelFromCache_1_3()");
410     std::chrono::time_point<std::chrono::system_clock> modelFromCacheTimepoint = std::chrono::system_clock::now();
411 
412     if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
413     {
414         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
415         return V1_3::ErrorStatus::GENERAL_FAILURE;
416     }
417 
418     if (cb.get() == nullptr)
419     {
420         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid callback passed to prepareModelFromCache_1_3");
421         return V1_3::ErrorStatus::INVALID_ARGUMENT;
422     }
423 
424     if (!runtime)
425     {
426         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Device unavailable");
427         return V1_3::ErrorStatus::DEVICE_UNAVAILABLE;
428     }
429 
430     // DataCacheHandle size should always be 1
431     // Arm NN model
432     if (dataCacheHandle.size() != 1)
433     {
434         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
435         return V1_3::ErrorStatus::GENERAL_FAILURE;
436     }
437 
438     // Check if model files cached they match the expected value
439     unsigned int numberOfCachedModelFiles = 0;
440     for (auto& backend : options.GetBackends())
441     {
442         numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
443     }
444     if (modelCacheHandle.size() != numberOfCachedModelFiles)
445     {
446         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
447         return V1_3::ErrorStatus::GENERAL_FAILURE;
448     }
449 
450     if (dataCacheHandle[0]->numFds != 1)
451     {
452         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, numFds != 1.");
453         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
454         return V1_3::ErrorStatus::GENERAL_FAILURE;
455     }
456 
457     if (dataCacheHandle[0]->data[0] < 0)
458     {
459         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, fd < 0");
460         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
461         return V1_3::ErrorStatus::GENERAL_FAILURE;
462     }
463 
464     int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
465     if (dataCacheFileAccessMode != O_RDWR)
466     {
467         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
468         return V1_3::ErrorStatus::GENERAL_FAILURE;
469     }
470 
471     auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
472     if (dataSize == 0)
473     {
474         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
475         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
476         return V1_3::ErrorStatus::GENERAL_FAILURE;
477     }
478 
479     int offset = 0;
480     {
481         struct stat statBuffer;
482         if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
483         {
484             unsigned long bufferSize = statBuffer.st_size;
485             if (bufferSize != dataSize)
486             {
487                 ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
488                 cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
489                 return V1_3::ErrorStatus::GENERAL_FAILURE;
490             }
491         }
492     }
493     std::vector<uint8_t> dataCacheData(dataSize);
494     pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
495     auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
496 
497     int gpuAccCachedFd = -1;
498     bool saveCachedNetwork = false;
499     if (modelCacheHandle.size() > 0)
500     {
501         unsigned int index = 0;
502         for (auto& backend : options.GetBackends())
503         {
504             // modelCacheHandle size should be equal to numberOfCachedModelFiles
505             // modelCacheHandle vector should be in same order as backends
506             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
507             if (numberOfCacheFiles > 0)
508             {
509                 if (modelCacheHandle[index]->numFds != 1)
510                 {
511                     ALOGW(
512                        "ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the model cache, numFds != 1.");
513                     cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
514                     return V1_3::ErrorStatus::GENERAL_FAILURE;
515                 }
516                 auto cachedFd = modelCacheHandle[index]->data[0];
517 
518                 int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
519                 if (modelCacheFileAccessMode != O_RDWR)
520                 {
521                     cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
522                     return V1_3::ErrorStatus::GENERAL_FAILURE;
523                 }
524 
525                 struct stat statBuffer;
526                 if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
527                 {
528                     long modelDataSize = statBuffer.st_size;
529                     if (modelDataSize <= 0)
530                     {
531                         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Wrong cached model size!");
532                         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
533                         return V1_3::ErrorStatus::NONE;
534                     }
535                     std::vector<uint8_t> modelData(modelDataSize);
536                     pread(cachedFd, modelData.data(), modelData.size(), 0);
537                     hashValue ^= CacheDataHandlerInstance().Hash(modelData);
538 
539                     // For GpuAcc numberOfCachedFiles is 1
540                     if (backend == armnn::Compute::GpuAcc)
541                     {
542                         gpuAccCachedFd = cachedFd;
543                     }
544                 }
545                 index += numberOfCacheFiles;
546             }
547         }
548     }
549 
550     if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
551     {
552         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: ValidateHash() failed!");
553         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
554         return V1_3::ErrorStatus::GENERAL_FAILURE;
555     }
556 
557     // Deserialize the network..
558     armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
559     try
560     {
561         network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
562     }
563     catch (std::exception&)
564     {
565         ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Exception caught from Deserializer!");
566         cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
567         return V1_3::ErrorStatus::GENERAL_FAILURE;
568     }
569 
570     // Optimize the network
571     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
572     armnn::OptimizerOptionsOpaque OptOptions;
573     OptOptions.SetReduceFp32ToFp16(options.GetFp16Enabled());
574     OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
575 
576     armnn::BackendOptions gpuAcc("GpuAcc",
577                                  {
578                                          {"FastMathEnabled",       options.IsFastMathEnabled()},
579                                          {"SaveCachedNetwork",     saveCachedNetwork},
580                                          {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
581                                          {"MLGOTuningFilePath",    options.GetClMLGOTunedParametersFile()},
582                                          {"CachedFileDescriptor",  gpuAccCachedFd}
583                                  });
584 
585     armnn::BackendOptions cpuAcc("CpuAcc",
586                                  {
587                                          {"FastMathEnabled", options.IsFastMathEnabled()},
588                                          {"NumberOfThreads", options.GetNumberOfThreads()}
589                                  });
590     OptOptions.AddModelOption(gpuAcc);
591     OptOptions.AddModelOption(cpuAcc);
592 
593     std::vector<std::string> errMessages;
594     try
595     {
596         optNet = armnn::Optimize(*network.get(),
597                                  options.GetBackends(),
598                                  runtime->GetDeviceSpec(),
599                                  OptOptions,
600                                  errMessages);
601     }
602     catch (std::exception& e)
603     {
604         std::stringstream message;
605         message << "Exception (" << e.what() << ") caught from optimize.";
606         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
607         return V1_3::ErrorStatus::NONE;
608     }
609 
610     // Check that the optimized network is valid.
611     if (!optNet)
612     {
613         std::stringstream message;
614         message << "Invalid optimized network";
615         for (const std::string& msg : errMessages)
616         {
617             message << "\n" << msg;
618         }
619         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
620         return V1_3::ErrorStatus::NONE;
621     }
622 
623     // Export the optimized network graph to a dot file if an output dump directory
624     // has been specified in the drivers' arguments.
625     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
626                                                                options.GetRequestInputsAndOutputsDumpDir());
627 
628     // Load it into the runtime.
629     armnn::NetworkId netId = 0;
630     std::string msg;
631     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
632                                                 MemorySource::Undefined,
633                                                 MemorySource::Undefined,
634                                                 options.IsGpuProfilingEnabled());
635 
636     try
637     {
638         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
639         {
640             return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, msg, cb);
641         }
642     }
643     catch (std::exception& e)
644     {
645         std::stringstream message;
646         message << "Exception (" << e.what() << ") caught from LoadNetwork.";
647         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
648         return V1_3::ErrorStatus::NONE;
649     }
650 
651     std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
652             new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(netId,
653                                                            runtime.get(),
654                                                            options.GetRequestInputsAndOutputsDumpDir(),
655                                                            options.IsGpuProfilingEnabled(),
656                                                            V1_3::Priority::MEDIUM,
657                                                            options.isAsyncModelExecutionEnabled(),
658                                                            options.getNoOfArmnnThreads(),
659                                                            options.isImportEnabled(),
660                                                            options.isExportEnabled(),
661                                                            true));
662 
663     NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
664 
665     ALOGV("ArmnnDriverImpl::prepareModelFromCache timing = %lld µs",
666           std::chrono::duration_cast<std::chrono::microseconds>
667           (std::chrono::system_clock::now() - modelFromCacheTimepoint).count());
668 
669     return V1_3::ErrorStatus::NONE;
670 }
671 
getCapabilities_1_3(const armnn::IRuntimePtr & runtime,V1_3::IDevice::getCapabilities_1_3_cb cb)672 Return<void> ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
673                                                   V1_3::IDevice::getCapabilities_1_3_cb cb)
674 {
675     ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()");
676 
677     V1_3::Capabilities capabilities;
678 
679     float defaultValue = .1f;
680 
681     if (runtime)
682     {
683         capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
684                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
685 
686         capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage =
687                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
688 
689         capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =
690                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
691 
692         capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
693                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
694 
695         capabilities.ifPerformance.execTime =
696                 ParseSystemProperty(g_ifPerformanceExecTime, defaultValue);
697 
698         capabilities.ifPerformance.powerUsage =
699                 ParseSystemProperty(g_ifPerformancePowerUsage, defaultValue);
700 
701         capabilities.whilePerformance.execTime =
702                 ParseSystemProperty(g_whilePerformanceExecTime, defaultValue);
703 
704         capabilities.whilePerformance.powerUsage =
705                 ParseSystemProperty(g_whilePerformancePowerUsage, defaultValue);
706 
707         // Set the base value for all operand types
708         capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({FLT_MAX, FLT_MAX});
709 
710         // Load supported operand types
711         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,
712                 {
713                     .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
714                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
715                 });
716 
717         update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,
718                 {
719                     .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
720                     .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
721                 });
722 
723         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16,
724                 {
725                     .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
726                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
727                 });
728 
729         update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16,
730                 {
731                     .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
732                     .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
733                 });
734 
735         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM,
736                 {
737                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
738                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
739                 });
740 
741         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM,
742                 {
743                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
744                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
745                 });
746         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
747                {
748                    .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime,
749                    defaultValue),
750                    .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage,
751                    defaultValue)
752                });
753 
754         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM,
755                 {
756                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
757                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
758                 });
759 
760         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
761                {
762                    .execTime =
763                    ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),
764                    .powerUsage =
765                    ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)
766                });
767 
768         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32,
769                 {
770                     .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
771                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
772                 });
773 
774         update(&capabilities.operandPerformance, V1_3::OperandType::INT32,
775                 {
776                     .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
777                     .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
778                 });
779 
780         cb(V1_3::ErrorStatus::NONE, capabilities);
781     }
782     else
783     {
784         capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime   = 0;
785         capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0;
786         capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime   = 0;
787         capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;
788         capabilities.ifPerformance.execTime      = 0;
789         capabilities.ifPerformance.powerUsage    = 0;
790         capabilities.whilePerformance.execTime   = 0;
791         capabilities.whilePerformance.powerUsage = 0;
792 
793         // Set the base value for all operand types
794         capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({0.f, 0.0f});
795 
796         cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
797     }
798 
799     return Void();
800 }
801 
802 } // namespace hal_1_3
803 } // namespace armnn_driver