xref: /aosp_15_r20/external/armnn/src/armnn/LoadedNetwork.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1  //
2  // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3  // SPDX-License-Identifier: MIT
4  //
5  #pragma once
6  
7  #include "Network.hpp"
8  #include "LayerFwd.hpp"
9  #include "Profiling.hpp"
10  
11  #include <armnn/Tensor.hpp>
12  
13  #include <armnn/backends/IBackendInternal.hpp>
14  #include <armnn/backends/IMemoryOptimizerStrategy.hpp>
15  #include <armnn/backends/Workload.hpp>
16  #include <armnn/backends/WorkloadFactory.hpp>
17  
18  #include <backendsCommon/DefaultAllocator.hpp>
19  #include <backendsCommon/MemoryManager.hpp>
20  #include <backendsCommon/TensorHandleFactoryRegistry.hpp>
21  #include <backendsCommon/memoryOptimizerStrategyLibrary/strategies/SingleAxisPriorityList.hpp>
22  
23  #include <client/include/IProfilingService.hpp>
24  #include <client/include/TimelineUtilityMethods.hpp>
25  
26  #include <common/include/LabelsAndEventClasses.hpp>
27  
28  #include <mutex>
29  #include <condition_variable>
30  #include <unordered_map>
31  
32  namespace cl
33  {
34  class Context;
35  class CommandQueue;
36  class Device;
37  }
38  
39  namespace armnn
40  {
41  
42  class LoadedNetwork
43  {
44  public:
45      using WorkloadQueue = std::vector<std::unique_ptr<IWorkload>>;
46  
~LoadedNetwork()47      ~LoadedNetwork()
48      {
49          FreeWorkingMemory();
50      }
51  
52      /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
53      /// overlapped Execution by calling this function from different threads.
54      std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);
55  
56      TensorInfo GetInputTensorInfo(LayerBindingId layerId) const;
57      TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const;
58  
59      std::vector<ImportedInputId> ImportInputs(const InputTensors& inputTensors,
60                                                MemorySource forceImportMemorySource = MemorySource::Undefined);
61      std::vector<ImportedOutputId> ImportOutputs(const OutputTensors& outputTensors,
62                                                  MemorySource forceImportMemorySource = MemorySource::Undefined);
63  
64      void ClearImportedInputs(const std::vector<ImportedInputId> inputIds);
65      void ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds);
66  
67      /// Single thread execution of the loaded network
68      Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors,
69                             std::vector<ImportedInputId> preImportedInputIds = {},
70                             std::vector<ImportedOutputId> preImportedOutputIds = {});
71  
72      /// Thread safe execution of the loaded network
73      Status Execute(const InputTensors& inputTensors,
74                     const OutputTensors& outputTensors,
75                     IWorkingMemHandle& workingMemHandle,
76                     std::vector<ImportedInputId> preImportedInputs = {},
77                     std::vector<ImportedOutputId> preImportedOutputs = {});
78  
79      static std::unique_ptr<LoadedNetwork> MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
80                                                              std::string& errorMessage,
81                                                              const INetworkProperties& networkProperties,
82                                                              arm::pipe::IProfilingService* profilingService);
83  
84      // NOTE we return by reference as the purpose of this method is only to provide
85      // access to the private m_Profiler and in theory we should not need to increment
86      // the shared_ptr's reference counter
GetProfiler() const87      const std::shared_ptr<IProfiler>& GetProfiler() const { return m_OptimizedNetwork->GetProfiler(); }
88  
89      void FreeWorkingMemory();
90  
91      void RegisterDebugCallback(const DebugCallbackFunction& func);
92  
93      void SendNetworkStructure(arm::pipe::IProfilingService& profilingService);
94  
IsAsyncEnabled()95      bool IsAsyncEnabled()
96      {
97          return m_NetworkProperties.m_AsyncEnabled;
98      }
99  
100      arm::pipe::ProfilingGuid GetNetworkGuid();
101  
102  private:
103  
104  
105      void AllocateWorkingMemory(
106  #if !defined(ARMNN_DISABLE_THREADS)
107          std::lock_guard<std::mutex>& lock
108  #endif
109      );
110      void AllocateAndExecuteConstantWorkloads();
111      void AllocateAndExecuteConstantWorkloadsAsync();
112  
113      std::unordered_map<LayerGuid, std::unique_ptr<IWorkload>> m_ConstantWorkloads;
114      std::unordered_map<LayerGuid, ITensorHandle*> m_ConstantTensorHandles;
115  
116      std::unique_ptr<IMemoryOptimizerStrategy> m_ConstantStrategy = std::make_unique<SingleAxisPriorityList>();
117  
118      LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
119                    const INetworkProperties& networkProperties,
120                    arm::pipe::IProfilingService* profilingService);
121  
122      void EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);
123  
124      void EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);
125  
126      void EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle);
127  
128      void ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle);
129  
130      bool Execute(std::unique_ptr<arm::pipe::TimelineUtilityMethods>& timelineUtils,
131                   arm::pipe::ProfilingGuid inferenceGuid);
132  
133      const IWorkloadFactory& GetWorkloadFactory(const Layer& layer) const;
134  
135      inline LayerBindingId ValidateImportedInputID(ImportedInputId id);
136      inline LayerBindingId ValidateImportedOutputID(ImportedOutputId id);
137  
138      void CreateMemoryProfile();
139      void CreateMemoryProfileAsync();
140  
141      std::unique_ptr<MemoryManager> CreateExternalMemoryManger(
142              std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemory);
143  
144      using BackendPtrMap = std::unordered_map<BackendId, IBackendInternalUniquePtr>;
145  
146      BackendPtrMap  m_Backends;
147      std::vector<IBackendInternal::IMemoryManagerSharedPtr> m_BackendMemoryMangers;
148  
149      using WorkloadFactoryMap = std::unordered_map<BackendId, IBackendInternal::IWorkloadFactoryPtr>;
150      WorkloadFactoryMap  m_WorkloadFactories;
151  
152      std::unique_ptr<IOptimizedNetwork> m_OptimizedNetwork;
153  
154      WorkloadQueue                      m_InputQueue;
155      WorkloadQueue                      m_WorkloadQueue;
156      WorkloadQueue                      m_OutputQueue;
157  
158  #if !defined(ARMNN_DISABLE_THREADS)
159      mutable std::mutex m_WorkingMemMutex;
160  #endif
161  
162      bool m_IsWorkingMemAllocated = false;
163  
164      INetworkProperties m_NetworkProperties;
165  
166      TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry;
167  
168      // NOTE: raw pointer because the profiling service is controlled by the Runtime
169      arm::pipe::IProfilingService* m_ProfilingService;
170  
171      struct ImportedTensorHandlePin
172      {
ImportedTensorHandlePinarmnn::LoadedNetwork::ImportedTensorHandlePin173          ImportedTensorHandlePin()
174          {}
175  
ImportedTensorHandlePinarmnn::LoadedNetwork::ImportedTensorHandlePin176          ImportedTensorHandlePin(LayerBindingId layerBindingId,
177                                  std::unique_ptr<ITensorHandle> tensorHandle)
178          : m_LayerBindingId(layerBindingId)
179          , m_TensorHandle(std::move(tensorHandle))
180          {}
181  
182          ImportedTensorHandlePin(ImportedTensorHandlePin&&) = default;
183  
~ImportedTensorHandlePinarmnn::LoadedNetwork::ImportedTensorHandlePin184          ~ImportedTensorHandlePin()
185          {
186              if (m_TensorHandle)
187              {
188                  m_TensorHandle->Unimport();
189              }
190          }
191  
192          LayerBindingId m_LayerBindingId;
193          std::unique_ptr<ITensorHandle> m_TensorHandle;
194      };
195  
196      std::vector<ImportedTensorHandlePin> m_PreImportedInputHandles;
197      std::vector<ImportedTensorHandlePin> m_PreImportedOutputHandles;
198  
199      ImportedInputId m_CurImportedInputId = 0;
200      ImportedInputId m_CurImportedOutputId = 0;
201  
202      std::unordered_map<BackendId, std::vector<MemBlock>> m_MemBlockMap;
203      std::unordered_map<BackendId, std::vector<MemBin>> m_MemBinMap;
204  
205      std::vector<ITensorHandle*> m_Tensorhandles;
206  
207      std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> m_TensorMemory;
208  
209      std::unique_ptr<MemoryManager> m_ExternalMemoryManager;
210  
211      std::unordered_map<BackendId, bool> m_SupportsExternallyManagedMemory;
212  
213      // A set of vectors to record the workload queue indexes and their corresponding Input/Output Slot indexes
214      // which are connected to Inputs and Outputs for the network.
215      struct WorkloadIndices
216      {
217          unsigned int m_WorkloadIndex;
218          unsigned int m_SlotIndex;
219      };
220  
221      struct OutputWorkloadIndices
222      {
223          WorkloadIndices m_OutputSlotIndices;
224          std::vector<WorkloadIndices> m_InputSlotIndices;
225      };
226      std::unordered_map<LayerBindingId, std::vector<WorkloadIndices>> m_InputWorkloadSlotPairs;
227      std::unordered_map<LayerBindingId, OutputWorkloadIndices> m_OutputWorkloadSlotPairs;
228      std::vector<bool> m_IsInputImported;
229      std::vector<bool> m_IsOutputImported;
230  
231  };
232  
233  }
234