xref: /aosp_15_r20/external/armnn/tests/InferenceModel.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1  //
2  // Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
3  // SPDX-License-Identifier: MIT
4  //
5  
6  #pragma once
7  
8  
9  #include <armnn/ArmNN.hpp>
10  
11  #if !defined(ARMNN_DISABLE_THREADS)
12  #include <armnn/Threadpool.hpp>
13  #include <common/include/IgnoreUnused.hpp>
14  #endif
15  
16  #include <armnn/Logging.hpp>
17  #include <armnn/utility/Timer.hpp>
18  #include <armnn/BackendRegistry.hpp>
19  #include <armnn/utility/Assert.hpp>
20  #include <armnn/utility/NumericCast.hpp>
21  
22  #include <armnnUtils/TContainer.hpp>
23  #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
24  
25  #include <common/include/ProfilingGuid.hpp>
26  
27  #if defined(ARMNN_SERIALIZER)
28  #include "armnnDeserializer/IDeserializer.hpp"
29  #endif
30  #if defined(ARMNN_TF_LITE_PARSER)
31  #include <armnnTfLiteParser/ITfLiteParser.hpp>
32  #endif
33  #if defined(ARMNN_ONNX_PARSER)
34  #include <armnnOnnxParser/IOnnxParser.hpp>
35  #endif
36  
37  #include <armnnUtils/Filesystem.hpp>
38  #include <HeapProfiling.hpp>
39  #include <TensorIOUtils.hpp>
40  
41  #include "armnn/utility/StringUtils.hpp"
42  #include <cxxopts/cxxopts.hpp>
43  #include "CxxoptsUtils.hpp"
44  #include <fmt/format.h>
45  #include <mapbox/variant.hpp>
46  
47  #include <algorithm>
48  #include <iterator>
49  #include <fstream>
50  #include <map>
51  #include <string>
52  #include <vector>
53  #include <type_traits>
54  
55  namespace InferenceModelInternal
56  {
57  using BindingPointInfo = armnn::BindingPointInfo;
58  
59  using QuantizationParams = std::pair<float,int32_t>;
60  
61  struct Params
62  {
63      std::string                     m_ModelPath;
64      std::vector<std::string>        m_InputBindings;
65      std::vector<armnn::TensorShape> m_InputShapes;
66      std::vector<std::string>        m_OutputBindings;
67      std::vector<armnn::BackendId>   m_ComputeDevices;
68      std::string                     m_DynamicBackendsPath;
69      size_t                          m_SubgraphId;
70      bool                            m_AllowExpandedDims;
71      bool                            m_IsModelBinary;
72      bool                            m_VisualizePostOptimizationModel;
73      bool                            m_EnableFp16TurboMode;
74      bool                            m_EnableBf16TurboMode;
75      bool                            m_PrintIntermediateLayers;
76      bool                            m_PrintIntermediateLayersToFile;
77      bool                            m_ParseUnsupported;
78      bool                            m_InferOutputShape;
79      bool                            m_EnableFastMath;
80      bool                            m_SaveCachedNetwork;
81      bool                            m_OutputDetailsToStdOut;
82      bool                            m_OutputDetailsOnlyToStdOut;
83      std::string                     m_CachedNetworkFilePath;
84      unsigned int                    m_NumberOfThreads;
85      std::string                     m_MLGOTuningFilePath;
86      bool                            m_AsyncEnabled;
87      size_t                          m_ThreadPoolSize;
88      bool                            m_ImportInputsIfAligned;
89  
90  
ParamsInferenceModelInternal::Params91      Params()
92          : m_ComputeDevices{}
93          , m_SubgraphId(0)
94          , m_AllowExpandedDims(false)
95          , m_IsModelBinary(true)
96          , m_VisualizePostOptimizationModel(false)
97          , m_EnableFp16TurboMode(false)
98          , m_EnableBf16TurboMode(false)
99          , m_PrintIntermediateLayers(false)
100          , m_PrintIntermediateLayersToFile(false)
101          , m_ParseUnsupported(false)
102          , m_InferOutputShape(false)
103          , m_EnableFastMath(false)
104          , m_SaveCachedNetwork(false)
105          , m_OutputDetailsToStdOut(false)
106          , m_OutputDetailsOnlyToStdOut(false)
107          , m_CachedNetworkFilePath("")
108          , m_NumberOfThreads(0)
109          , m_MLGOTuningFilePath("")
110          , m_AsyncEnabled(false)
111          , m_ThreadPoolSize(0)
112          , m_ImportInputsIfAligned(false)
113      {}
114  };
115  
116  } // namespace InferenceModelInternal
117  
118  template <typename IParser>
119  struct CreateNetworkImpl
120  {
121  public:
122      using Params = InferenceModelInternal::Params;
123  
CreateCreateNetworkImpl124      static armnn::INetworkPtr Create(const Params& params,
125                                       std::vector<armnn::BindingPointInfo>& inputBindings,
126                                       std::vector<armnn::BindingPointInfo>& outputBindings)
127      {
128          const std::string& modelPath = params.m_ModelPath;
129  
130          // Create a network from a file on disk
131          auto parser(IParser::Create());
132  
133          std::map<std::string, armnn::TensorShape> inputShapes;
134          if (!params.m_InputShapes.empty())
135          {
136              const size_t numInputShapes   = params.m_InputShapes.size();
137              const size_t numInputBindings = params.m_InputBindings.size();
138              if (numInputShapes < numInputBindings)
139              {
140                  throw armnn::Exception(fmt::format(
141                      "Not every input has its tensor shape specified: expected={0}, got={1}",
142                      numInputBindings, numInputShapes));
143              }
144  
145              for (size_t i = 0; i < numInputShapes; i++)
146              {
147                  inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
148              }
149          }
150  
151          std::vector<std::string> requestedOutputs = params.m_OutputBindings;
152          armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
153  
154          {
155              ARMNN_SCOPED_HEAP_PROFILING("Parsing");
156              // Handle text and binary input differently by calling the corresponding parser function
157              network = (params.m_IsModelBinary ?
158                  parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
159                  parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
160          }
161  
162          for (const std::string& inputLayerName : params.m_InputBindings)
163          {
164              inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
165          }
166  
167          for (const std::string& outputLayerName : params.m_OutputBindings)
168          {
169              outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
170          }
171  
172          return network;
173      }
174  };
175  
176  #if defined(ARMNN_SERIALIZER)
177  template <>
178  struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
179  {
180  public:
181      using IParser          = armnnDeserializer::IDeserializer;
182      using Params           = InferenceModelInternal::Params;
183  
CreateCreateNetworkImpl184      static armnn::INetworkPtr Create(const Params& params,
185                                       std::vector<armnn::BindingPointInfo>& inputBindings,
186                                       std::vector<armnn::BindingPointInfo>& outputBindings)
187      {
188          auto parser(IParser::Create());
189          ARMNN_ASSERT(parser);
190  
191          armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
192  
193          {
194              ARMNN_SCOPED_HEAP_PROFILING("Parsing");
195  
196              std::error_code errorCode;
197              fs::path pathToFile(params.m_ModelPath);
198              if (!fs::exists(pathToFile, errorCode))
199              {
200                  throw armnn::FileNotFoundException(fmt::format("Cannot find the file ({0}) errorCode: {1} {2}",
201                                                     params.m_ModelPath,
202                                                     errorCode.message(),
203                                                     CHECK_LOCATION().AsString()));
204              }
205              std::ifstream file(params.m_ModelPath, std::ios::binary);
206  
207              network = parser->CreateNetworkFromBinary(file);
208          }
209  
210          unsigned int subgraphId = armnn::numeric_cast<unsigned int>(params.m_SubgraphId);
211  
212          for (const std::string& inputLayerName : params.m_InputBindings)
213          {
214              armnnDeserializer::BindingPointInfo inputBinding =
215                  parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
216              inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
217          }
218  
219          for (const std::string& outputLayerName : params.m_OutputBindings)
220          {
221              armnnDeserializer::BindingPointInfo outputBinding =
222                  parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
223              outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
224          }
225  
226          return network;
227      }
228  };
229  #endif
230  
231  #if defined(ARMNN_TF_LITE_PARSER)
232  template <>
233  struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
234  {
235  public:
236      using IParser = armnnTfLiteParser::ITfLiteParser;
237      using Params = InferenceModelInternal::Params;
238  
CreateCreateNetworkImpl239      static armnn::INetworkPtr Create(const Params& params,
240                                       std::vector<armnn::BindingPointInfo>& inputBindings,
241                                       std::vector<armnn::BindingPointInfo>& outputBindings)
242      {
243          const std::string& modelPath = params.m_ModelPath;
244  
245          // Create a network from a file on disk
246          IParser::TfLiteParserOptions options;
247          options.m_AllowExpandedDims          = params.m_AllowExpandedDims;
248          options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
249          options.m_InferAndValidate           = params.m_InferOutputShape;
250          auto parser(IParser::Create(options));
251  
252          armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
253  
254          {
255              ARMNN_SCOPED_HEAP_PROFILING("Parsing");
256              network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
257          }
258  
259          for (const std::string& inputLayerName : params.m_InputBindings)
260          {
261              armnn::BindingPointInfo inputBinding =
262                  parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
263              inputBindings.push_back(inputBinding);
264          }
265  
266          for (const std::string& outputLayerName : params.m_OutputBindings)
267          {
268              armnn::BindingPointInfo outputBinding =
269                  parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
270              outputBindings.push_back(outputBinding);
271          }
272  
273          return network;
274      }
275  };
276  #endif
277  
278  #if defined(ARMNN_ONNX_PARSER)
279  template <>
280  struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
281  {
282  public:
283      using IParser = armnnOnnxParser::IOnnxParser;
284      using Params = InferenceModelInternal::Params;
285      using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
286  
CreateCreateNetworkImpl287      static armnn::INetworkPtr Create(const Params& params,
288                                       std::vector<BindingPointInfo>& inputBindings,
289                                       std::vector<BindingPointInfo>& outputBindings)
290      {
291          const std::string& modelPath = params.m_ModelPath;
292  
293          // Create a network from a file on disk
294          auto parser(IParser::Create());
295  
296          armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
297  
298          std::map<std::string, armnn::TensorShape> inputShapes;
299          if (!params.m_InputShapes.empty())
300          {
301              const size_t numInputShapes   = params.m_InputShapes.size();
302              const size_t numInputBindings = params.m_InputBindings.size();
303              if (numInputShapes < numInputBindings)
304              {
305                  throw armnn::Exception(fmt::format(
306                      "Not every input has its tensor shape specified: expected={0}, got={1}",
307                      numInputBindings, numInputShapes));
308              }
309  
310              for (size_t i = 0; i < numInputShapes; i++)
311              {
312                  inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
313              }
314  
315              {
316                  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
317                  network = (params.m_IsModelBinary ?
318                      parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
319                      parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes));
320              }
321          }
322  
323          else
324          {
325              ARMNN_SCOPED_HEAP_PROFILING("Parsing");
326              network = (params.m_IsModelBinary ?
327                  parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
328                  parser->CreateNetworkFromTextFile(modelPath.c_str()));
329          }
330  
331          for (const std::string& inputLayerName : params.m_InputBindings)
332          {
333              BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
334              inputBindings.push_back(inputBinding);
335          }
336  
337          for (const std::string& outputLayerName : params.m_OutputBindings)
338          {
339              BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
340              outputBindings.push_back(outputBinding);
341          }
342  
343          return network;
344      }
345  };
346  #endif
347  
348  
349  
350  template <typename IParser, typename TDataType>
351  class InferenceModel
352  {
353  public:
354      using DataType           = TDataType;
355      using Params             = InferenceModelInternal::Params;
356      using QuantizationParams = InferenceModelInternal::QuantizationParams;
357  
358  
359      struct CommandLineOptions
360      {
361          std::string m_ModelDir;
362          std::vector<std::string> m_ComputeDevices;
363          std::string m_DynamicBackendsPath;
364          bool m_VisualizePostOptimizationModel;
365          bool m_EnableFp16TurboMode;
366          bool m_EnableBf16TurboMode;
367          std::string m_Labels;
368  
GetComputeDevicesAsBackendIdsInferenceModel::CommandLineOptions369          std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
370          {
371              std::vector<armnn::BackendId> backendIds;
372              std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
373              return backendIds;
374          }
375      };
376  
AddCommandLineOptions(cxxopts::Options & options,CommandLineOptions & cLineOptions,std::vector<std::string> & required)377      static void AddCommandLineOptions(cxxopts::Options& options,
378                                        CommandLineOptions& cLineOptions, std::vector<std::string>& required)
379      {
380          const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
381  
382          const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
383                                            + armnn::BackendRegistryInstance().GetBackendIdsAsString();
384  
385          options
386              .allow_unrecognised_options()
387              .add_options()
388                  ("m,model-dir", "Path to directory containing model files (.prototxt/.tflite)",
389                   cxxopts::value<std::string>(cLineOptions.m_ModelDir))
390                  ("c,compute", backendsMessage.c_str(),
391                   cxxopts::value<std::vector<std::string>>(cLineOptions.m_ComputeDevices)->default_value("CpuRef"))
392                  ("b,dynamic-backends-path",
393                   "Path where to load any available dynamic backend from. "
394                   "If left empty (the default), dynamic backends will not be used.",
395                   cxxopts::value(cLineOptions.m_DynamicBackendsPath))
396                  ("l,labels",
397                   "Text file containing one image filename - correct label pair per line, "
398                   "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.m_Labels))
399                  ("v,visualize-optimized-model",
400                   "Produce a dot file useful for visualizing the graph post optimization."
401                   "The file will have the same name as the model with the .dot extention.",
402                   cxxopts::value<bool>(cLineOptions.m_VisualizePostOptimizationModel)->default_value("false"))
403                  ("fp16-turbo-mode",
404                   "If this option is enabled FP32 layers, weights and biases will be converted "
405                   "to FP16 where the backend supports it.",
406                   cxxopts::value<bool>(cLineOptions.m_EnableFp16TurboMode)->default_value("false"))
407                  ("bf16-turbo-mode",
408                   "If this option is enabled FP32 layers, weights and biases will be converted "
409                   "to BF16 where the backend supports it.",
410                   cxxopts::value<bool>(cLineOptions.m_EnableBf16TurboMode)->default_value("false"));
411  
412          required.emplace_back("model-dir");
413      }
414  
InferenceModel(const Params & params,bool enableProfiling,const std::string & dynamicBackendsPath,const std::shared_ptr<armnn::IRuntime> & runtime=nullptr)415      InferenceModel(const Params& params,
416                     bool enableProfiling,
417                     const std::string& dynamicBackendsPath,
418                     const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
419          : m_EnableProfiling(enableProfiling),
420            m_ProfilingDetailsMethod(armnn::ProfilingDetailsMethod::Undefined),
421            m_DynamicBackendsPath(dynamicBackendsPath),
422            m_ImportInputsIfAligned(params.m_ImportInputsIfAligned)
423      {
424          if (runtime)
425          {
426              m_Runtime = runtime;
427          }
428          else
429          {
430              armnn::IRuntime::CreationOptions options;
431              options.m_EnableGpuProfiling = m_EnableProfiling;
432              options.m_DynamicBackendsPath = m_DynamicBackendsPath;
433              m_Runtime = armnn::IRuntime::Create(options);
434          }
435  
436          // Configure the Profiler if the the profiling details are opted for
437          if (params.m_OutputDetailsOnlyToStdOut)
438              m_ProfilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
439          else if (params.m_OutputDetailsToStdOut)
440              m_ProfilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
441  
442          std::string invalidBackends;
443          if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
444          {
445              throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
446          }
447  
448          armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
449          {
450              const auto parsing_start_time = armnn::GetTimeNow();
451              armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
452  
453              ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2)
454                              << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms.";
455  
456              ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
457  
458              armnn::OptimizerOptionsOpaque options;
459              options.SetReduceFp32ToFp16(params.m_EnableFp16TurboMode);
460              options.SetDebugEnabled(params.m_PrintIntermediateLayers);
461              options.SetDebugToFileEnabled(params.m_PrintIntermediateLayersToFile);
462              options.SetShapeInferenceMethod(params.m_InferOutputShape ?
463                      armnn::ShapeInferenceMethod::InferAndValidate : armnn::ShapeInferenceMethod::ValidateOnly);
464              options.SetProfilingEnabled(m_EnableProfiling);
465  
466              armnn::BackendOptions gpuAcc("GpuAcc",
467              {
468                  { "FastMathEnabled", params.m_EnableFastMath },
469                  { "SaveCachedNetwork", params.m_SaveCachedNetwork },
470                  { "CachedNetworkFilePath", params.m_CachedNetworkFilePath },
471                  { "MLGOTuningFilePath", params.m_MLGOTuningFilePath }
472              });
473  
474              armnn::BackendOptions cpuAcc("CpuAcc",
475              {
476                  { "FastMathEnabled", params.m_EnableFastMath },
477                  { "NumberOfThreads", params.m_NumberOfThreads }
478              });
479              options.AddModelOption(gpuAcc);
480              options.AddModelOption(cpuAcc);
481  
482              const auto optimization_start_time = armnn::GetTimeNow();
483              optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
484  
485              ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
486                              << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms.";
487  
488              if (!optNet)
489              {
490                  throw armnn::Exception("Optimize returned nullptr");
491              }
492  
493  
494          }
495  
496          if (params.m_VisualizePostOptimizationModel)
497          {
498              fs::path filename = params.m_ModelPath;
499              filename.replace_extension("dot");
500              std::fstream file(filename.c_str(), std::ios_base::out);
501              optNet->SerializeToDot(file);
502          }
503  
504          armnn::Status ret;
505          {
506              ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
507  
508              const auto loading_start_time = armnn::GetTimeNow();
509              armnn::INetworkProperties networkProperties(params.m_AsyncEnabled,
510                                                          armnn::MemorySource::Undefined,
511                                                          armnn::MemorySource::Undefined,
512                                                          enableProfiling,
513                                                          m_ProfilingDetailsMethod);
514              std::string errorMessage;
515              ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
516  
517              ARMNN_LOG(info) << "Network loading time: " << std::setprecision(2)
518                              << std::fixed << armnn::GetTimeDuration(loading_start_time).count() << " ms.";
519  #if !defined(ARMNN_DISABLE_THREADS)
520              if (params.m_AsyncEnabled && params.m_ThreadPoolSize > 0)
521              {
522                  std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
523                  for (size_t i = 0; i < params.m_ThreadPoolSize; ++i)
524                  {
525                      memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier));
526                  }
527  
528                  m_Threadpool = std::make_unique<armnn::Threadpool>(params.m_ThreadPoolSize,
529                                                                     m_Runtime.get(),
530                                                                     memHandles);
531              }
532  #endif
533          }
534  
535          if (ret == armnn::Status::Failure)
536          {
537              throw armnn::Exception("IRuntime::LoadNetwork failed");
538          }
539      }
540  
CheckInputIndexIsValid(unsigned int inputIndex) const541      void CheckInputIndexIsValid(unsigned int inputIndex) const
542      {
543          if (m_InputBindings.size() < inputIndex + 1)
544          {
545              throw armnn::Exception(fmt::format("Input index out of range: {}", inputIndex));
546          }
547      }
548  
CheckOutputIndexIsValid(unsigned int outputIndex) const549      void CheckOutputIndexIsValid(unsigned int outputIndex) const
550      {
551          if (m_OutputBindings.size() < outputIndex + 1)
552          {
553              throw armnn::Exception(fmt::format("Output index out of range: {}", outputIndex));
554          }
555      }
556  
GetInputSize(unsigned int inputIndex=0u) const557      unsigned int GetInputSize(unsigned int inputIndex = 0u) const
558      {
559          CheckInputIndexIsValid(inputIndex);
560          return m_InputBindings[inputIndex].second.GetNumElements();
561      }
562  
GetOutputSize(unsigned int outputIndex=0u) const563      unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
564      {
565          CheckOutputIndexIsValid(outputIndex);
566          return m_OutputBindings[outputIndex].second.GetNumElements();
567      }
568  
Run(const std::vector<armnnUtils::TContainer> & inputContainers,std::vector<armnnUtils::TContainer> & outputContainers)569      std::chrono::duration<double, std::milli> Run(
570              const std::vector<armnnUtils::TContainer>& inputContainers,
571              std::vector<armnnUtils::TContainer>& outputContainers)
572      {
573          for (unsigned int i = 0; i < outputContainers.size(); ++i)
574          {
575              const unsigned int expectedOutputDataSize = GetOutputSize(i);
576  
577              mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
578              {
579                  const unsigned int actualOutputDataSize   = armnn::numeric_cast<unsigned int>(value.size());
580                  if (actualOutputDataSize < expectedOutputDataSize)
581                  {
582                      unsigned int outputIndex = i;
583                      throw armnn::Exception(
584                              fmt::format("Not enough data for output #{0}: expected "
585                              "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
586                  }
587              },
588              outputContainers[i]);
589          }
590  
591          std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
592  
593          // Start timer to record inference time in EnqueueWorkload (in milliseconds)
594          const auto start_time = armnn::GetTimeNow();
595  
596          armnn::Status ret;
597          if (m_ImportInputsIfAligned)
598          {
599              std::vector<armnn::ImportedInputId> importedInputIds = m_Runtime->ImportInputs(
600                  m_NetworkIdentifier, MakeInputTensors(inputContainers), armnn::MemorySource::Malloc);
601  
602              std::vector<armnn::ImportedOutputId> importedOutputIds = m_Runtime->ImportOutputs(
603                  m_NetworkIdentifier, MakeOutputTensors(outputContainers), armnn::MemorySource::Malloc);
604  
605              ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
606                                               MakeInputTensors(inputContainers),
607                                               MakeOutputTensors(outputContainers),
608                                               importedInputIds,
609                                               importedOutputIds);
610          }
611          else
612          {
613              ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
614                                               MakeInputTensors(inputContainers),
615                                               MakeOutputTensors(outputContainers));
616          }
617          const auto duration = armnn::GetTimeDuration(start_time);
618  
619          // if profiling is enabled print out the results
620          if (profiler && profiler->IsProfilingEnabled())
621          {
622              profiler->Print(std::cout);
623          }
624  
625          if (ret == armnn::Status::Failure)
626          {
627              throw armnn::Exception("IRuntime::EnqueueWorkload failed");
628          }
629          else
630          {
631              return duration;
632          }
633      }
634  
RunAsync(armnn::experimental::IWorkingMemHandle & workingMemHandleRef,const std::vector<armnnUtils::TContainer> & inputContainers,std::vector<armnnUtils::TContainer> & outputContainers,unsigned int inferenceID)635      std::tuple<unsigned int, std::chrono::duration<double, std::milli>> RunAsync(
636          armnn::experimental::IWorkingMemHandle& workingMemHandleRef,
637          const std::vector<armnnUtils::TContainer>& inputContainers,
638          std::vector<armnnUtils::TContainer>& outputContainers,
639          unsigned int inferenceID)
640      {
641          for (unsigned int i = 0; i < outputContainers.size(); ++i)
642          {
643              const unsigned int expectedOutputDataSize = GetOutputSize(i);
644  
645              mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
646              {
647                  const unsigned int actualOutputDataSize   = armnn::numeric_cast<unsigned int>(value.size());
648                  if (actualOutputDataSize < expectedOutputDataSize)
649                  {
650                      unsigned int outputIndex = i;
651                      throw armnn::Exception(
652                              fmt::format("Not enough data for output #{0}: expected "
653                              "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
654                  }
655              },
656              outputContainers[i]);
657          }
658  
659          std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
660  
661          // Start timer to record inference time in EnqueueWorkload (in milliseconds)
662          const auto start_time = armnn::GetTimeNow();
663  
664          armnn::Status ret = m_Runtime->Execute(workingMemHandleRef,
665                                                 MakeInputTensors(inputContainers),
666                                                 MakeOutputTensors(outputContainers));
667  
668          const auto duration = armnn::GetTimeDuration(start_time);
669  
670          // if profiling is enabled print out the results
671          if (profiler && profiler->IsProfilingEnabled())
672          {
673              profiler->Print(std::cout);
674          }
675  
676          if (ret == armnn::Status::Failure)
677          {
678              throw armnn::Exception(
679                  fmt::format("IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
680                              m_NetworkIdentifier, inferenceID));
681          }
682          else
683          {
684              return std::make_tuple(inferenceID, duration);
685          }
686      }
687  
RunAsync(const std::vector<armnnUtils::TContainer> & inputContainers,std::vector<armnnUtils::TContainer> & outputContainers,std::shared_ptr<armnn::IAsyncExecutionCallback> cb)688      void RunAsync(const std::vector<armnnUtils::TContainer>& inputContainers,
689                    std::vector<armnnUtils::TContainer>& outputContainers,
690                    std::shared_ptr<armnn::IAsyncExecutionCallback> cb)
691      {
692  #if !defined(ARMNN_DISABLE_THREADS)
693          for (unsigned int i = 0; i < outputContainers.size(); ++i)
694          {
695              const unsigned int expectedOutputDataSize = GetOutputSize(i);
696  
697              mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
698              {
699                  const unsigned int actualOutputDataSize   = armnn::numeric_cast<unsigned int>(value.size());
700                  if (actualOutputDataSize < expectedOutputDataSize)
701                  {
702                      unsigned int outputIndex = i;
703                      throw armnn::Exception(
704                              fmt::format("Not enough data for output #{0}: expected "
705                              "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
706                  }
707              },
708              outputContainers[i]);
709          }
710  
711          std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
712  
713          m_Threadpool->Schedule(m_NetworkIdentifier,
714                                 MakeInputTensors(inputContainers),
715                                 MakeOutputTensors(outputContainers),
716                                 armnn::QosExecPriority::Medium,
717                                 cb);
718  
719          // if profiling is enabled print out the results
720          if (profiler && profiler->IsProfilingEnabled())
721          {
722              profiler->Print(std::cout);
723          }
724  #endif
725      }
726  
GetInputBindingInfo(unsigned int inputIndex=0u) const727      const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
728      {
729          CheckInputIndexIsValid(inputIndex);
730          return m_InputBindings[inputIndex];
731      }
732  
GetInputBindingInfos() const733      const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const
734      {
735          return m_InputBindings;
736      }
737  
GetOutputBindingInfo(unsigned int outputIndex=0u) const738      const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
739      {
740          CheckOutputIndexIsValid(outputIndex);
741          return m_OutputBindings[outputIndex];
742      }
743  
GetOutputBindingInfos() const744      const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const
745      {
746          return m_OutputBindings;
747      }
748  
GetQuantizationParams(unsigned int outputIndex=0u) const749      QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
750      {
751          CheckOutputIndexIsValid(outputIndex);
752          return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
753                                m_OutputBindings[outputIndex].second.GetQuantizationOffset());
754      }
755  
GetInputQuantizationParams(unsigned int inputIndex=0u) const756      QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
757      {
758          CheckInputIndexIsValid(inputIndex);
759          return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
760                                m_InputBindings[inputIndex].second.GetQuantizationOffset());
761      }
762  
GetAllQuantizationParams() const763      std::vector<QuantizationParams> GetAllQuantizationParams() const
764      {
765          std::vector<QuantizationParams> quantizationParams;
766          for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
767          {
768              quantizationParams.push_back(GetQuantizationParams(i));
769          }
770          return quantizationParams;
771      }
772  
CreateWorkingMemHandle()773      std::unique_ptr<armnn::experimental::IWorkingMemHandle> CreateWorkingMemHandle()
774      {
775          return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
776      }
777  
778  private:
779      armnn::NetworkId m_NetworkIdentifier;
780      std::shared_ptr<armnn::IRuntime> m_Runtime;
781  #if !defined(ARMNN_DISABLE_THREADS)
782      std::unique_ptr<armnn::Threadpool> m_Threadpool;
783  #endif
784  
785      std::vector<armnn::BindingPointInfo> m_InputBindings;
786      std::vector<armnn::BindingPointInfo> m_OutputBindings;
787      bool m_EnableProfiling;
788      armnn::ProfilingDetailsMethod m_ProfilingDetailsMethod;
789      std::string m_DynamicBackendsPath;
790      bool m_ImportInputsIfAligned;
791  
792      template<typename TContainer>
MakeInputTensors(const std::vector<TContainer> & inputDataContainers)793      armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
794      {
795          return armnnUtils::MakeInputTensors(m_InputBindings, inputDataContainers);
796      }
797  
798      template<typename TContainer>
MakeOutputTensors(std::vector<TContainer> & outputDataContainers)799      armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
800      {
801          return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
802      }
803  };
804