xref: /aosp_15_r20/hardware/interfaces/neuralnetworks/aidl/utils/src/PreparedModel.cpp (revision 4d7e907c777eeecc4c5bd7cf640a754fac206ff7)
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "PreparedModel.h"
18 
19 #include "Burst.h"
20 #include "Callbacks.h"
21 #include "Conversions.h"
22 #include "Execution.h"
23 #include "ProtectCallback.h"
24 #include "Utils.h"
25 
26 #include <aidl/android/hardware/neuralnetworks/Request.h>
27 #include <android/binder_auto_utils.h>
28 #include <nnapi/IPreparedModel.h>
29 #include <nnapi/Result.h>
30 #include <nnapi/TypeUtils.h>
31 #include <nnapi/Types.h>
32 #include <nnapi/hal/CommonUtils.h>
33 
34 #include <memory>
35 #include <tuple>
36 #include <utility>
37 #include <vector>
38 
39 // See hardware/interfaces/neuralnetworks/utils/README.md for more information on AIDL interface
40 // lifetimes across processes and for protecting asynchronous calls across AIDL.
41 
42 namespace aidl::android::hardware::neuralnetworks::utils {
43 namespace {
44 
convertExecutionResults(const std::vector<OutputShape> & outputShapes,const Timing & timing)45 nn::GeneralResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> convertExecutionResults(
46         const std::vector<OutputShape>& outputShapes, const Timing& timing) {
47     return std::make_pair(NN_TRY(nn::convert(outputShapes)), NN_TRY(nn::convert(timing)));
48 }
49 
convertFencedExecutionResults(ErrorStatus status,const aidl_hal::Timing & timingLaunched,const aidl_hal::Timing & timingFenced)50 nn::GeneralResult<std::pair<nn::Timing, nn::Timing>> convertFencedExecutionResults(
51         ErrorStatus status, const aidl_hal::Timing& timingLaunched,
52         const aidl_hal::Timing& timingFenced) {
53     HANDLE_STATUS_AIDL(status) << "fenced execution callback info failed with " << toString(status);
54     return std::make_pair(NN_TRY(nn::convert(timingLaunched)), NN_TRY(nn::convert(timingFenced)));
55 }
56 
handleExecutionResult(const ExecutionResult & result,const hal::utils::RequestRelocation & relocation)57 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> handleExecutionResult(
58         const ExecutionResult& result, const hal::utils::RequestRelocation& relocation) {
59     if (!result.outputSufficientSize) {
60         auto canonicalOutputShapes =
61                 nn::convert(result.outputShapes).value_or(std::vector<nn::OutputShape>{});
62         return NN_ERROR(nn::ErrorStatus::OUTPUT_INSUFFICIENT_SIZE, std::move(canonicalOutputShapes))
63                << "execution failed with " << nn::ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
64     }
65     auto [outputShapes, timing] =
66             NN_TRY(convertExecutionResults(result.outputShapes, result.timing));
67 
68     if (relocation.output) {
69         relocation.output->flush();
70     }
71     return std::make_pair(std::move(outputShapes), timing);
72 }
73 
74 nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>>
handleFencedExecutionResult(const FencedExecutionResult & result,const hal::utils::RequestRelocation & relocation)75 handleFencedExecutionResult(const FencedExecutionResult& result,
76                             const hal::utils::RequestRelocation& relocation) {
77     auto resultSyncFence = nn::SyncFence::createAsSignaled();
78     if (result.syncFence.get() != -1) {
79         resultSyncFence = nn::SyncFence::create(NN_TRY(nn::convert(result.syncFence))).value();
80     }
81 
82     auto callback = result.callback;
83     if (callback == nullptr) {
84         return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE) << "callback is null";
85     }
86 
87     // If computeFenced required the request memory to be moved into shared memory, block here until
88     // the fenced execution has completed and flush the memory back.
89     if (relocation.output) {
90         const auto state = resultSyncFence.syncWait({});
91         if (state != nn::SyncFence::FenceState::SIGNALED) {
92             return NN_ERROR() << "syncWait failed with " << state;
93         }
94         relocation.output->flush();
95     }
96 
97     // Create callback which can be used to retrieve the execution error status and timings.
98     nn::ExecuteFencedInfoCallback resultCallback =
99             [callback]() -> nn::GeneralResult<std::pair<nn::Timing, nn::Timing>> {
100         ErrorStatus errorStatus;
101         Timing timingLaunched;
102         Timing timingFenced;
103         const auto ret = callback->getExecutionInfo(&timingLaunched, &timingFenced, &errorStatus);
104         HANDLE_ASTATUS(ret) << "fenced execution callback getExecutionInfo failed";
105         return convertFencedExecutionResults(errorStatus, timingLaunched, timingFenced);
106     };
107 
108     return std::make_pair(std::move(resultSyncFence), std::move(resultCallback));
109 }
110 
111 }  // namespace
112 
create(std::shared_ptr<aidl_hal::IPreparedModel> preparedModel,nn::Version featureLevel)113 nn::GeneralResult<std::shared_ptr<const PreparedModel>> PreparedModel::create(
114         std::shared_ptr<aidl_hal::IPreparedModel> preparedModel, nn::Version featureLevel) {
115     if (preparedModel == nullptr) {
116         return NN_ERROR()
117                << "aidl_hal::utils::PreparedModel::create must have non-null preparedModel";
118     }
119 
120     return std::make_shared<const PreparedModel>(PrivateConstructorTag{}, std::move(preparedModel),
121                                                  featureLevel);
122 }
123 
PreparedModel(PrivateConstructorTag,std::shared_ptr<aidl_hal::IPreparedModel> preparedModel,nn::Version featureLevel)124 PreparedModel::PreparedModel(PrivateConstructorTag /*tag*/,
125                              std::shared_ptr<aidl_hal::IPreparedModel> preparedModel,
126                              nn::Version featureLevel)
127     : kPreparedModel(std::move(preparedModel)), kFeatureLevel(featureLevel) {}
128 
execute(const nn::Request & request,nn::MeasureTiming measure,const nn::OptionalTimePoint & deadline,const nn::OptionalDuration & loopTimeoutDuration,const std::vector<nn::TokenValuePair> & hints,const std::vector<nn::ExtensionNameAndPrefix> & extensionNameToPrefix) const129 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> PreparedModel::execute(
130         const nn::Request& request, nn::MeasureTiming measure,
131         const nn::OptionalTimePoint& deadline, const nn::OptionalDuration& loopTimeoutDuration,
132         const std::vector<nn::TokenValuePair>& hints,
133         const std::vector<nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const {
134     // Ensure that request is ready for IPC.
135     std::optional<nn::Request> maybeRequestInShared;
136     hal::utils::RequestRelocation relocation;
137     const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared(
138             &request, nn::kDefaultRequestMemoryAlignment, nn::kDefaultRequestMemoryPadding,
139             &maybeRequestInShared, &relocation));
140 
141     const auto aidlRequest = NN_TRY(convert(requestInShared));
142     const auto aidlMeasure = NN_TRY(convert(measure));
143     const auto aidlDeadline = NN_TRY(convert(deadline));
144     const auto aidlLoopTimeoutDuration = NN_TRY(convert(loopTimeoutDuration));
145     return executeInternal(aidlRequest, aidlMeasure, aidlDeadline, aidlLoopTimeoutDuration, hints,
146                            extensionNameToPrefix, relocation);
147 }
148 
149 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>
executeInternal(const Request & request,bool measure,int64_t deadline,int64_t loopTimeoutDuration,const std::vector<nn::TokenValuePair> & hints,const std::vector<nn::ExtensionNameAndPrefix> & extensionNameToPrefix,const hal::utils::RequestRelocation & relocation) const150 PreparedModel::executeInternal(const Request& request, bool measure, int64_t deadline,
151                                int64_t loopTimeoutDuration,
152                                const std::vector<nn::TokenValuePair>& hints,
153                                const std::vector<nn::ExtensionNameAndPrefix>& extensionNameToPrefix,
154                                const hal::utils::RequestRelocation& relocation) const {
155     if (relocation.input) {
156         relocation.input->flush();
157     }
158 
159     ExecutionResult executionResult;
160     if (kFeatureLevel.level >= nn::Version::Level::FEATURE_LEVEL_8) {
161         auto aidlHints = NN_TRY(convert(hints));
162         auto aidlExtensionPrefix = NN_TRY(convert(extensionNameToPrefix));
163         const auto ret = kPreparedModel->executeSynchronouslyWithConfig(
164                 request,
165                 {measure, loopTimeoutDuration, std::move(aidlHints),
166                  std::move(aidlExtensionPrefix)},
167                 deadline, &executionResult);
168         HANDLE_ASTATUS(ret) << "executeSynchronouslyWithConfig failed";
169     } else {
170         const auto ret = kPreparedModel->executeSynchronously(
171                 request, measure, deadline, loopTimeoutDuration, &executionResult);
172         HANDLE_ASTATUS(ret) << "executeSynchronously failed";
173     }
174     return handleExecutionResult(executionResult, relocation);
175 }
176 
177 nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>>
executeFenced(const nn::Request & request,const std::vector<nn::SyncFence> & waitFor,nn::MeasureTiming measure,const nn::OptionalTimePoint & deadline,const nn::OptionalDuration & loopTimeoutDuration,const nn::OptionalDuration & timeoutDurationAfterFence,const std::vector<nn::TokenValuePair> & hints,const std::vector<nn::ExtensionNameAndPrefix> & extensionNameToPrefix) const178 PreparedModel::executeFenced(
179         const nn::Request& request, const std::vector<nn::SyncFence>& waitFor,
180         nn::MeasureTiming measure, const nn::OptionalTimePoint& deadline,
181         const nn::OptionalDuration& loopTimeoutDuration,
182         const nn::OptionalDuration& timeoutDurationAfterFence,
183         const std::vector<nn::TokenValuePair>& hints,
184         const std::vector<nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const {
185     // Ensure that request is ready for IPC.
186     std::optional<nn::Request> maybeRequestInShared;
187     hal::utils::RequestRelocation relocation;
188     const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared(
189             &request, nn::kDefaultRequestMemoryAlignment, nn::kDefaultRequestMemoryPadding,
190             &maybeRequestInShared, &relocation));
191 
192     const auto aidlRequest = NN_TRY(convert(requestInShared));
193     const auto aidlWaitFor = NN_TRY(convert(waitFor));
194     const auto aidlMeasure = NN_TRY(convert(measure));
195     const auto aidlDeadline = NN_TRY(convert(deadline));
196     const auto aidlLoopTimeoutDuration = NN_TRY(convert(loopTimeoutDuration));
197     const auto aidlTimeoutDurationAfterFence = NN_TRY(convert(timeoutDurationAfterFence));
198     return executeFencedInternal(aidlRequest, aidlWaitFor, aidlMeasure, aidlDeadline,
199                                  aidlLoopTimeoutDuration, aidlTimeoutDurationAfterFence, hints,
200                                  extensionNameToPrefix, relocation);
201 }
202 
203 nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>>
executeFencedInternal(const Request & request,const std::vector<ndk::ScopedFileDescriptor> & waitFor,bool measure,int64_t deadline,int64_t loopTimeoutDuration,int64_t timeoutDurationAfterFence,const std::vector<nn::TokenValuePair> & hints,const std::vector<nn::ExtensionNameAndPrefix> & extensionNameToPrefix,const hal::utils::RequestRelocation & relocation) const204 PreparedModel::executeFencedInternal(
205         const Request& request, const std::vector<ndk::ScopedFileDescriptor>& waitFor, bool measure,
206         int64_t deadline, int64_t loopTimeoutDuration, int64_t timeoutDurationAfterFence,
207         const std::vector<nn::TokenValuePair>& hints,
208         const std::vector<nn::ExtensionNameAndPrefix>& extensionNameToPrefix,
209         const hal::utils::RequestRelocation& relocation) const {
210     if (relocation.input) {
211         relocation.input->flush();
212     }
213 
214     FencedExecutionResult result;
215     if (kFeatureLevel.level >= nn::Version::Level::FEATURE_LEVEL_8) {
216         auto aidlHints = NN_TRY(convert(hints));
217         auto aidlExtensionPrefix = NN_TRY(convert(extensionNameToPrefix));
218         const auto ret = kPreparedModel->executeFencedWithConfig(
219                 request, waitFor,
220                 {measure, loopTimeoutDuration, std::move(aidlHints),
221                  std::move(aidlExtensionPrefix)},
222                 deadline, timeoutDurationAfterFence, &result);
223         HANDLE_ASTATUS(ret) << "executeFencedWithConfig failed";
224     } else {
225         const auto ret = kPreparedModel->executeFenced(request, waitFor, measure, deadline,
226                                                        loopTimeoutDuration,
227                                                        timeoutDurationAfterFence, &result);
228         HANDLE_ASTATUS(ret) << "executeFenced failed";
229     }
230     return handleFencedExecutionResult(result, relocation);
231 }
232 
createReusableExecution(const nn::Request & request,nn::MeasureTiming measure,const nn::OptionalDuration & loopTimeoutDuration,const std::vector<nn::TokenValuePair> & hints,const std::vector<nn::ExtensionNameAndPrefix> & extensionNameToPrefix) const233 nn::GeneralResult<nn::SharedExecution> PreparedModel::createReusableExecution(
234         const nn::Request& request, nn::MeasureTiming measure,
235         const nn::OptionalDuration& loopTimeoutDuration,
236         const std::vector<nn::TokenValuePair>& hints,
237         const std::vector<nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const {
238     // Ensure that request is ready for IPC.
239     std::optional<nn::Request> maybeRequestInShared;
240     hal::utils::RequestRelocation relocation;
241     const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared(
242             &request, nn::kDefaultRequestMemoryAlignment, nn::kDefaultRequestMemoryPadding,
243             &maybeRequestInShared, &relocation));
244 
245     auto aidlRequest = NN_TRY(convert(requestInShared));
246     auto aidlMeasure = NN_TRY(convert(measure));
247     auto aidlLoopTimeoutDuration = NN_TRY(convert(loopTimeoutDuration));
248 
249     if (kFeatureLevel.level >= nn::Version::Level::FEATURE_LEVEL_8) {
250         std::shared_ptr<IExecution> execution;
251         auto aidlHints = NN_TRY(convert(hints));
252         auto aidlExtensionPrefix = NN_TRY(convert(extensionNameToPrefix));
253 
254         const auto ret = kPreparedModel->createReusableExecution(
255                 aidlRequest,
256                 {aidlMeasure, aidlLoopTimeoutDuration, std::move(aidlHints),
257                  std::move(aidlExtensionPrefix)},
258                 &execution);
259         HANDLE_ASTATUS(ret) << "createReusableExecution failed";
260         return Execution::create(std::move(execution), std::move(relocation));
261     }
262 
263     return ExecutionWithCachedRequest::create(shared_from_this(), std::move(aidlRequest),
264                                               std::move(relocation), aidlMeasure,
265                                               aidlLoopTimeoutDuration);
266 }
267 
configureExecutionBurst() const268 nn::GeneralResult<nn::SharedBurst> PreparedModel::configureExecutionBurst() const {
269     std::shared_ptr<IBurst> burst;
270     const auto ret = kPreparedModel->configureExecutionBurst(&burst);
271     HANDLE_ASTATUS(ret) << "configureExecutionBurst failed";
272     return Burst::create(std::move(burst), kFeatureLevel);
273 }
274 
getUnderlyingResource() const275 std::any PreparedModel::getUnderlyingResource() const {
276     std::shared_ptr<aidl_hal::IPreparedModel> resource = kPreparedModel;
277     return resource;
278 }
279 
compute(const nn::OptionalTimePoint & deadline) const280 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> Execution::compute(
281         const nn::OptionalTimePoint& deadline) const {
282     const auto aidlDeadline = NN_TRY(convert(deadline));
283 
284     if (kRelocation.input) {
285         kRelocation.input->flush();
286     }
287 
288     ExecutionResult executionResult;
289     auto ret = kExecution->executeSynchronously(aidlDeadline, &executionResult);
290     HANDLE_ASTATUS(ret) << "executeSynchronously failed";
291     return handleExecutionResult(executionResult, kRelocation);
292 }
293 
computeFenced(const std::vector<nn::SyncFence> & waitFor,const nn::OptionalTimePoint & deadline,const nn::OptionalDuration & timeoutDurationAfterFence) const294 nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>> Execution::computeFenced(
295         const std::vector<nn::SyncFence>& waitFor, const nn::OptionalTimePoint& deadline,
296         const nn::OptionalDuration& timeoutDurationAfterFence) const {
297     const auto aidlWaitFor = NN_TRY(convert(waitFor));
298     const auto aidlDeadline = NN_TRY(convert(deadline));
299     const auto aidlTimeoutDurationAfterFence = NN_TRY(convert(timeoutDurationAfterFence));
300 
301     if (kRelocation.input) {
302         kRelocation.input->flush();
303     }
304 
305     FencedExecutionResult result;
306     const auto ret = kExecution->executeFenced(aidlWaitFor, aidlDeadline,
307                                                aidlTimeoutDurationAfterFence, &result);
308     HANDLE_ASTATUS(ret) << "executeFenced failed";
309     return handleFencedExecutionResult(result, kRelocation);
310 }
311 
312 }  // namespace aidl::android::hardware::neuralnetworks::utils
313