xref: /aosp_15_r20/external/armnn/src/armnn/test/optimizations/FoldPadTests.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LayersFwd.hpp"
7 #include <Network.hpp>
8 #include <TestUtils.hpp>
9 #include <doctest/doctest.h>
10 #include <armnn/backends/TensorHandle.hpp>
11 #include <Optimizer.hpp>
12 
13 TEST_SUITE("Optimizer")
14 {
15 using namespace armnn;
16 using namespace armnn::optimizations;
17 
18 TEST_CASE("FoldPadLayerIntoConvolution2dLayer")
19 {
20     Graph              graph;
21     const unsigned int inputShape[]   = {1, 2, 2, 3};
22     const unsigned int paddedShape[]  = {1, 6, 6, 3};
23     const unsigned int weightsShape[] = {1, 2, 3, 3};
24     const unsigned int outputShape[]  = {1, 2, 1, 1};
25 
26     TensorInfo inputInfo(4, inputShape, DataType::Float32);
27     TensorInfo paddedInfo(4, paddedShape, DataType::Float32);
28     TensorInfo weightsInfo(4, weightsShape, DataType::Float32, 1.0f, 0, true);
29     TensorInfo outputInfo(4, outputShape, DataType::Float32);
30 
31     Layer* input = graph.AddLayer<InputLayer>(0, "input");
32     input->GetOutputSlot().SetTensorInfo(inputInfo);
33 
34     PadDescriptor padDescriptor({{0, 0},
35                                  {2, 2},
36                                  {2, 2},
37                                  {0, 0}});
38 
39     PadLayer* padLayer = graph.AddLayer<PadLayer>(padDescriptor, "pad");
40     padLayer->GetOutputSlot().SetTensorInfo(paddedInfo);
41 
42     Convolution2dDescriptor convolution2dDescriptor;
43     convolution2dDescriptor.m_BiasEnabled = false;
44     convolution2dDescriptor.m_StrideX     = 1;
45     convolution2dDescriptor.m_StrideY     = 1;
46     convolution2dDescriptor.m_DataLayout  = DataLayout::NHWC;
47 
48     std::vector<float> weightsVector(18);
49     ConstTensor        weights(weightsInfo, weightsVector);
50 
51     ConstantLayer* weightsLayer = graph.AddLayer<ConstantLayer>("Weights");
52     weightsLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(weights);
53     weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
54 
55     Convolution2dLayer* conv2dLayer = graph.AddLayer<Convolution2dLayer>(convolution2dDescriptor, "conv2d");
56     conv2dLayer->GetOutputSlot().SetTensorInfo(outputInfo);
57 
58     Layer* output = graph.AddLayer<OutputLayer>(0, "output");
59 
60     // Connect up layers - input -> pad -> conv2d -> output
61     input->GetOutputSlot().Connect(padLayer->GetInputSlot(0));
62     padLayer->GetOutputSlot().Connect(conv2dLayer->GetInputSlot(0));
63     weightsLayer->GetOutputSlot().Connect(conv2dLayer->GetInputSlot(1));
64     conv2dLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
65 
__anonf6df302a0102(const Layer* const layer)66     auto checkSimpleConv2d = [](const Layer* const layer)->bool {
67         const auto conv2dLayer       = static_cast<const Convolution2dLayer*>(layer);
68         const auto conv2dLayerParams = conv2dLayer->GetParameters();
69         return IsLayerOfType<Convolution2dLayer>(layer) && (layer->GetNameStr() == "conv2d") &&
70             (conv2dLayerParams.m_PadLeft == 0) && (conv2dLayerParams.m_PadRight == 0) &&
71             (conv2dLayerParams.m_PadTop == 0) && (conv2dLayerParams.m_PadBottom == 0) &&
72             (conv2dLayerParams.m_StrideX == 1) && (conv2dLayerParams.m_StrideY == 1) &&
73             (conv2dLayerParams.m_BiasEnabled == false) && (conv2dLayerParams.m_DataLayout == DataLayout::NHWC);
74     };
75 
76     CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<InputLayer>,
77                                                       &IsLayerOfType<ConstantLayer>,
78                                                       &IsLayerOfType<PadLayer>,
79                                                       checkSimpleConv2d,
80                                                       &IsLayerOfType<OutputLayer>));
81 
82     armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(FoldPadIntoConvolution2d()));
83 
__anonf6df302a0202(const Layer* const layer)84     auto checkPadFoldedIntoConv2d = [](const Layer* const layer)->bool {
85         const auto conv2dLayer       = static_cast<const Convolution2dLayer*>(layer);
86         const auto conv2dLayerParams = conv2dLayer->GetParameters();
87         return IsLayerOfType<Convolution2dLayer>(layer) && (layer->GetNameStr() == "folded-pad-into-conv2d") &&
88             (conv2dLayerParams.m_PadLeft == 2) && (conv2dLayerParams.m_PadRight == 2) &&
89             (conv2dLayerParams.m_PadTop == 2) && (conv2dLayerParams.m_PadBottom == 2) &&
90             (conv2dLayerParams.m_StrideX == 1) && (conv2dLayerParams.m_StrideY == 1) &&
91             (conv2dLayerParams.m_BiasEnabled == false) && (conv2dLayerParams.m_DataLayout == DataLayout::NHWC);
92     };
93 
94     CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<InputLayer>,
95                                                       &IsLayerOfType<ConstantLayer>,
96                                                       checkPadFoldedIntoConv2d,
97                                                       &IsLayerOfType<OutputLayer>));
98 }
99 
100 TEST_CASE("FoldPadLayerIntoDepthwiseConvolution2dLayer")
101 {
102     Graph              graph;
103     const unsigned int inputShape[]   = {1, 2, 2, 3};
104     const unsigned int paddedShape[]  = {1, 6, 6, 3};
105     const unsigned int weightsShape[] = {1, 2, 3, 3};
106     const unsigned int outputShape[]  = {1, 2, 1, 3};
107 
108     TensorInfo inputInfo(4, inputShape, DataType::Float32);
109     TensorInfo paddedInfo(4, paddedShape, DataType::Float32);
110     TensorInfo weightsInfo(4, weightsShape, DataType::Float32, 1.0f, 0, true);
111     TensorInfo outputInfo(4, outputShape, DataType::Float32);
112 
113     Layer* input = graph.AddLayer<InputLayer>(0, "input");
114     input->GetOutputSlot().SetTensorInfo(inputInfo);
115 
116     PadDescriptor padDescriptor({{0, 0},
117                                  {2, 2},
118                                  {2, 2},
119                                  {0, 0}});
120 
121     PadLayer* padLayer = graph.AddLayer<PadLayer>(padDescriptor, "pad");
122     padLayer->GetOutputSlot().SetTensorInfo(paddedInfo);
123 
124     DepthwiseConvolution2dDescriptor depthwiseConvolution2dDescriptor;
125     depthwiseConvolution2dDescriptor.m_BiasEnabled = false;
126     depthwiseConvolution2dDescriptor.m_StrideX     = 1;
127     depthwiseConvolution2dDescriptor.m_StrideY     = 1;
128     depthwiseConvolution2dDescriptor.m_DataLayout  = DataLayout::NHWC;
129 
130     std::vector<float> weightsVector(18);
131     ConstTensor        weights(weightsInfo, weightsVector);
132 
133     auto* weightsLayer = graph.AddLayer<ConstantLayer>("weights");
134     weightsLayer->GetOutputSlot().SetTensorInfo(weightsInfo);
135     weightsLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(weights);
136 
137     auto* depthwiseConv2dLayer = graph.AddLayer<DepthwiseConvolution2dLayer>(depthwiseConvolution2dDescriptor,
138                                                                              "depthwiseConv2d");
139     depthwiseConv2dLayer->GetOutputSlot().SetTensorInfo(outputInfo);
140 
141     Layer* output = graph.AddLayer<OutputLayer>(0, "output");
142 
143     // Connect up layers - input -> pad -> depthwiseConv2d -> output
144     input->GetOutputSlot().Connect(padLayer->GetInputSlot(0));
145     padLayer->GetOutputSlot().Connect(depthwiseConv2dLayer->GetInputSlot(0));
146     weightsLayer->GetOutputSlot().Connect(depthwiseConv2dLayer->GetInputSlot(1));
147     depthwiseConv2dLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
148 
__anonf6df302a0302(const Layer* const layer)149     auto checkSimpleDepthwiseConv2d = [](const Layer* const layer)->bool {
150         const auto depthwiseConv2dLayer       = static_cast<const DepthwiseConvolution2dLayer*>(layer);
151         const auto depthwiseConv2dLayerParams = depthwiseConv2dLayer->GetParameters();
152         return IsLayerOfType<DepthwiseConvolution2dLayer>(layer) && (layer->GetNameStr() == "depthwiseConv2d") &&
153             (depthwiseConv2dLayerParams.m_PadLeft == 0) && (depthwiseConv2dLayerParams.m_PadRight == 0) &&
154             (depthwiseConv2dLayerParams.m_PadTop == 0) && (depthwiseConv2dLayerParams.m_PadBottom == 0) &&
155             (depthwiseConv2dLayerParams.m_StrideX == 1) && (depthwiseConv2dLayerParams.m_StrideY == 1) &&
156             (depthwiseConv2dLayerParams.m_BiasEnabled == false) &&
157             (depthwiseConv2dLayerParams.m_DataLayout == DataLayout::NHWC);
158     };
159 
160     CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<InputLayer>,
161                                                       &IsLayerOfType<ConstantLayer>,
162                                                       &IsLayerOfType<PadLayer>,
163                                                       checkSimpleDepthwiseConv2d,
164                                                       &IsLayerOfType<OutputLayer>));
165 
166     armnn::Optimizer::Pass(graph, MakeOptimizations(FoldPadIntoDepthwiseConvolution2d()));
167 
__anonf6df302a0402(const Layer* const layer)168     auto checkPadFoldedIntoDepthwiseConv2d = [](const Layer* const layer)->bool {
169         const auto depthwiseConv2dLayer       = static_cast<const DepthwiseConvolution2dLayer*>(layer);
170         const auto depthwiseConv2dLayerParams = depthwiseConv2dLayer->GetParameters();
171         return IsLayerOfType<DepthwiseConvolution2dLayer>(layer) &&
172             (layer->GetNameStr() == "folded-pad-into-depthwiseConv2d") &&
173             (depthwiseConv2dLayerParams.m_PadLeft == 2) && (depthwiseConv2dLayerParams.m_PadRight == 2) &&
174             (depthwiseConv2dLayerParams.m_PadTop == 2) && (depthwiseConv2dLayerParams.m_PadBottom == 2) &&
175             (depthwiseConv2dLayerParams.m_StrideX == 1) && (depthwiseConv2dLayerParams.m_StrideY == 1) &&
176             (depthwiseConv2dLayerParams.m_BiasEnabled == false) &&
177             (depthwiseConv2dLayerParams.m_DataLayout == DataLayout::NHWC);
178     };
179 
180     CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<InputLayer>,
181                                                       &IsLayerOfType<ConstantLayer>,
182                                                       checkPadFoldedIntoDepthwiseConv2d,
183                                                       &IsLayerOfType<OutputLayer>));
184 }
185 
186 TEST_CASE("FoldPadLayerIntoPooling2dLayer")
187 {
188     Graph              graph;
189     const unsigned int inputShape[]  = {1, 2, 2, 3};
190     const unsigned int paddedShape[] = {1, 4, 4, 3};
191     const unsigned int outputShape[] = {1, 2, 2, 3};
192 
193     TensorInfo inputInfo(4, inputShape, DataType::Float32);
194     TensorInfo paddedInfo(4, paddedShape, DataType::Float32);
195     TensorInfo outputInfo(4, outputShape, DataType::Float32);
196 
197     Layer* input = graph.AddLayer<InputLayer>(0, "input");
198     input->GetOutputSlot().SetTensorInfo(inputInfo);
199 
200     PadDescriptor padDescriptor({{0, 0},
201                                  {1, 1},
202                                  {1, 1},
203                                  {0, 0}});
204 
205     PadLayer* padLayer = graph.AddLayer<PadLayer>(padDescriptor, "pad");
206     padLayer->GetOutputSlot().SetTensorInfo(paddedInfo);
207 
208     Pooling2dDescriptor pooling2dDescriptor;
209     pooling2dDescriptor.m_PoolType   = PoolingAlgorithm::Average;
210     pooling2dDescriptor.m_PoolWidth  = 3;
211     pooling2dDescriptor.m_PoolHeight = 3;
212     pooling2dDescriptor.m_StrideX    = 1;
213     pooling2dDescriptor.m_StrideY    = 1;
214     pooling2dDescriptor.m_DataLayout = DataLayout::NHWC;
215 
216     Pooling2dLayer* pool2dLayer = graph.AddLayer<Pooling2dLayer>(pooling2dDescriptor, "pool2d");
217     pool2dLayer->GetOutputSlot().SetTensorInfo(outputInfo);
218 
219     Layer* output = graph.AddLayer<OutputLayer>(0, "output");
220 
221     // Connect up layers - input -> pad -> pool2d -> output
222     input->GetOutputSlot().Connect(padLayer->GetInputSlot(0));
223     padLayer->GetOutputSlot().Connect(pool2dLayer->GetInputSlot(0));
224     pool2dLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
225 
__anonf6df302a0502(const Layer* const layer) 226     auto checkSimplePool2d = [&](const Layer* const layer) {
227         const auto pool2dLayer = static_cast<const Pooling2dLayer*>(layer);
228         return IsLayerOfType<Pooling2dLayer>(layer) && (layer->GetNameStr() == "pool2d") &&
229             (pool2dLayer->GetParameters() == pooling2dDescriptor);
230     };
231 
232     CHECK(CheckSequence(graph.cbegin(), graph.cend(),
233                              &IsLayerOfType<InputLayer>,
234                              &IsLayerOfType<PadLayer>,
235                              checkSimplePool2d,
236                              &IsLayerOfType<OutputLayer>));
237 
238     armnn::Optimizer::Pass(graph, MakeOptimizations(FoldPadIntoPooling2d()));
239 
__anonf6df302a0602(const Layer* const layer) 240     auto checkPadFoldedIntoPool2d = [&](const Layer* const layer) {
241         if (!IsLayerOfType<Pooling2dLayer>(layer) || (layer->GetNameStr() != "folded-pad-into-pool2d"))
242         {
243             return false;
244         }
245 
246         const auto                pool2dLayer       = static_cast<const Pooling2dLayer*>(layer);
247         const Pooling2dDescriptor pool2dLayerParams = pool2dLayer->GetParameters();
248 
249         Pooling2dDescriptor pool2dLayerParamsNoPad = pool2dLayerParams;
250         pool2dLayerParamsNoPad.m_PadLeft       = 0;
251         pool2dLayerParamsNoPad.m_PadRight      = 0;
252         pool2dLayerParamsNoPad.m_PadTop        = 0;
253         pool2dLayerParamsNoPad.m_PadBottom     = 0;
254         // If we fold then PaddingMethod will be set to Ignore. The original will be Exclude.
255         pool2dLayerParamsNoPad.m_PaddingMethod = PaddingMethod::Exclude;
256 
257         return (pool2dLayerParamsNoPad == pooling2dDescriptor) && (pool2dLayerParams.m_PadLeft == 1) &&
258             (pool2dLayerParams.m_PadRight == 1) && (pool2dLayerParams.m_PadTop == 1) &&
259             (pool2dLayerParams.m_PadBottom == 1) && (pool2dLayerParams.m_PaddingMethod == PaddingMethod::IgnoreValue);
260     };
261 
262     CHECK(CheckSequence(graph.cbegin(), graph.cend(),
263                              &IsLayerOfType<InputLayer>,
264                              checkPadFoldedIntoPool2d,
265                              &IsLayerOfType<OutputLayer>));
266 }
267 
268 TEST_CASE("FoldPadLayerIntoPooling2d_PadWithMultipleOutputsShouldNotBeOptimized")
269 {
270     // In this test case we'll setup a pad layer with two outputs. One goes to a polling layers and the other
271     // goes to an output layer. FoldPadLayerIntoPooling2d should not optimize this graph as it uses the
272     // OptimizeForExclusiveConnection method.
273     Graph              graph;
274     const unsigned int inputShape[]  = {1, 2, 2, 3};
275     const unsigned int paddedShape[] = {1, 4, 4, 3};
276     const unsigned int outputShape[] = {1, 2, 2, 3};
277 
278     TensorInfo inputInfo(4, inputShape, DataType::Float32);
279     TensorInfo paddedInfo(4, paddedShape, DataType::Float32);
280     TensorInfo outputInfo(4, outputShape, DataType::Float32);
281 
282     Layer* input = graph.AddLayer<InputLayer>(0, "input");
283     input->GetOutputSlot().SetTensorInfo(inputInfo);
284 
285     PadDescriptor padDescriptor({{0, 0},
286                                  {1, 1},
287                                  {1, 1},
288                                  {0, 0}});
289 
290     PadLayer* padLayer = graph.AddLayer<PadLayer>(padDescriptor, "pad");
291     padLayer->GetOutputSlot().SetTensorInfo(paddedInfo);
292 
293     Pooling2dDescriptor pooling2dDescriptor;
294     pooling2dDescriptor.m_PoolType   = PoolingAlgorithm::Average;
295     pooling2dDescriptor.m_PoolWidth  = 3;
296     pooling2dDescriptor.m_PoolHeight = 3;
297     pooling2dDescriptor.m_StrideX    = 1;
298     pooling2dDescriptor.m_StrideY    = 1;
299     pooling2dDescriptor.m_DataLayout = DataLayout::NHWC;
300 
301     Pooling2dLayer* pool2dLayer = graph.AddLayer<Pooling2dLayer>(pooling2dDescriptor, "pool2d");
302     pool2dLayer->GetOutputSlot().SetTensorInfo(outputInfo);
303 
304     Layer* output = graph.AddLayer<OutputLayer>(0, "output");
305 
306     // Connect up layers - input -> pad -> pool2d -> output
307     input->GetOutputSlot().Connect(padLayer->GetInputSlot(0));
308     padLayer->GetOutputSlot().Connect(pool2dLayer->GetInputSlot(0));
309     pool2dLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
310 
311     // Add the alternative branch from the pas layer to an output layer.
312     Layer* secondOutput = graph.AddLayer<OutputLayer>(1, "dummy output");
313     padLayer->GetOutputSlot().Connect(secondOutput->GetInputSlot(0));
314 
__anonf6df302a0702(const Layer* const layer) 315     auto checkSimplePool2d = [&](const Layer* const layer) {
316         const auto pool2dLayer = static_cast<const Pooling2dLayer*>(layer);
317         return IsLayerOfType<Pooling2dLayer>(layer) && (layer->GetNameStr() == "pool2d") &&
318             (pool2dLayer->GetParameters() == pooling2dDescriptor);
319     };
320 
321     // Initial sequence.
322     CHECK(CheckSequence(graph.cbegin(), graph.cend(),
323                              &IsLayerOfType<InputLayer>,
324                              &IsLayerOfType<PadLayer>,
325                              checkSimplePool2d,
326                              &IsLayerOfType<OutputLayer>,
327                              &IsLayerOfType<OutputLayer>));
328 
329     armnn::Optimizer::Pass(graph, MakeOptimizations(FoldPadIntoPooling2d()));
330 
331     // The network should not change.
332     CHECK(CheckSequence(graph.cbegin(), graph.cend(),
333                              &IsLayerOfType<InputLayer>,
334                              &IsLayerOfType<PadLayer>,
335                              checkSimplePool2d,
336                              &IsLayerOfType<OutputLayer>,
337                              &IsLayerOfType<OutputLayer>));
338 }
339 
340 TEST_CASE("FoldPadLayerIntoPooling2dLayer_PoolingLayerWithExcludePaddingShouldNotTakeMorePadding")
341 {
342     // In this test setup input, Pad layer, Pooling layer that includes padding, output layer. The optimization
343     // should not work as the pooling layer already includes and existing pad and specifies PaddingMethod::Exclude.
344     Graph              graph;
345     const unsigned int inputShape[]  = {1, 2, 2, 3};
346     const unsigned int paddedShape[] = {1, 4, 4, 3};
347     const unsigned int outputShape[] = {1, 2, 2, 3};
348 
349     TensorInfo inputInfo(4, inputShape, DataType::Float32);
350     TensorInfo paddedInfo(4, paddedShape, DataType::Float32);
351     TensorInfo outputInfo(4, outputShape, DataType::Float32);
352 
353     Layer* input = graph.AddLayer<InputLayer>(0, "input");
354     input->GetOutputSlot().SetTensorInfo(inputInfo);
355 
356     PadDescriptor padDescriptor({{0, 0},
357                                  {1, 1},
358                                  {1, 1},
359                                  {0, 0}});
360 
361     PadLayer* padLayer = graph.AddLayer<PadLayer>(padDescriptor, "pad");
362     padLayer->GetOutputSlot().SetTensorInfo(paddedInfo);
363 
364     Pooling2dDescriptor pooling2dDescriptor;
365     pooling2dDescriptor.m_PoolType      = PoolingAlgorithm::Average;
366     pooling2dDescriptor.m_PoolWidth     = 3;
367     pooling2dDescriptor.m_PoolHeight    = 3;
368     pooling2dDescriptor.m_StrideX       = 1;
369     pooling2dDescriptor.m_StrideY       = 1;
370     pooling2dDescriptor.m_DataLayout    = DataLayout::NHWC;
371     // Include a pad with the pooling layer. This should prevent the optimization working.
372     pooling2dDescriptor.m_PadLeft       = 1;
373     pooling2dDescriptor.m_PadRight      = 1;
374     pooling2dDescriptor.m_PadTop        = 1;
375     pooling2dDescriptor.m_PadBottom     = 1;
376     pooling2dDescriptor.m_PaddingMethod = PaddingMethod::Exclude;
377 
378     Pooling2dLayer* pool2dLayer = graph.AddLayer<Pooling2dLayer>(pooling2dDescriptor, "pool2d");
379     pool2dLayer->GetOutputSlot().SetTensorInfo(outputInfo);
380 
381     Layer* output = graph.AddLayer<OutputLayer>(0, "output");
382 
383     // Connect up layers - input -> pad -> pool2d -> output
384     input->GetOutputSlot().Connect(padLayer->GetInputSlot(0));
385     padLayer->GetOutputSlot().Connect(pool2dLayer->GetInputSlot(0));
386     pool2dLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
387 
__anonf6df302a0802(const Layer* const layer) 388     auto checkSimplePool2d = [&](const Layer* const layer) {
389         const auto pool2dLayer = static_cast<const Pooling2dLayer*>(layer);
390         return IsLayerOfType<Pooling2dLayer>(layer) && (layer->GetNameStr() == "pool2d") &&
391             (pool2dLayer->GetParameters() == pooling2dDescriptor);
392     };
393 
394     CHECK(CheckSequence(graph.cbegin(), graph.cend(),
395                              &IsLayerOfType<InputLayer>,
396                              &IsLayerOfType<PadLayer>,
397                              checkSimplePool2d,
398                              &IsLayerOfType<OutputLayer>));
399 
400     armnn::Optimizer::Pass(graph, MakeOptimizations(FoldPadIntoPooling2d()));
401 
402     // The optimization should not have modified the graph.
403     CHECK(CheckSequence(graph.cbegin(), graph.cend(),
404                              &IsLayerOfType<InputLayer>,
405                              &IsLayerOfType<PadLayer>,
406                              checkSimplePool2d,
407                              &IsLayerOfType<OutputLayer>));
408 }
409 
410 TEST_CASE("FoldPadLayerIntoPooling2dLayer_MaxPoolingLayerWithLargePadValueShouldNotBeFolded")
411 {
412     // In this test setup input, Pad layer with a large pad value, Max Pooling layer, output layer. The optimization
413     // should not work as the pad value will modify the result of the max pooling layer.
414     Graph              graph;
415     const unsigned int inputShape[]  = {1, 2, 2, 3};
416     const unsigned int paddedShape[] = {1, 4, 4, 3};
417     const unsigned int outputShape[] = {1, 2, 2, 3};
418 
419     TensorInfo inputInfo(4, inputShape, DataType::Float32);
420     TensorInfo paddedInfo(4, paddedShape, DataType::Float32);
421     TensorInfo outputInfo(4, outputShape, DataType::Float32);
422 
423     Layer* input = graph.AddLayer<InputLayer>(0, "input");
424     input->GetOutputSlot().SetTensorInfo(inputInfo);
425 
426     PadDescriptor padDescriptor({{0, 0},
427                                  {1, 1},
428                                  {1, 1},
429                                  {0, 0}});
430     // For Max pooling of a float a pad value of 0 is more than enough to stop the fold happening.
431     // Set this to -std::numeric_limits<float>::infinity() to make the fold happen.
432     padDescriptor.m_PadValue = 0;
433 
434     PadLayer* padLayer = graph.AddLayer<PadLayer>(padDescriptor, "pad");
435     padLayer->GetOutputSlot().SetTensorInfo(paddedInfo);
436 
437     Pooling2dDescriptor pooling2dDescriptor;
438     pooling2dDescriptor.m_PoolType   = PoolingAlgorithm::Max;
439     pooling2dDescriptor.m_PoolWidth  = 3;
440     pooling2dDescriptor.m_PoolHeight = 3;
441     pooling2dDescriptor.m_StrideX    = 1;
442     pooling2dDescriptor.m_StrideY    = 1;
443     pooling2dDescriptor.m_DataLayout = DataLayout::NHWC;
444 
445     Pooling2dLayer* pool2dLayer = graph.AddLayer<Pooling2dLayer>(pooling2dDescriptor, "pool2d");
446     pool2dLayer->GetOutputSlot().SetTensorInfo(outputInfo);
447 
448     Layer* output = graph.AddLayer<OutputLayer>(0, "output");
449 
450     // Connect up layers - input -> pad -> pool2d -> output
451     input->GetOutputSlot().Connect(padLayer->GetInputSlot(0));
452     padLayer->GetOutputSlot().Connect(pool2dLayer->GetInputSlot(0));
453     pool2dLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
454 
__anonf6df302a0902(const Layer* const layer) 455     auto checkSimplePool2d = [&](const Layer* const layer) {
456         const auto pool2dLayer = static_cast<const Pooling2dLayer*>(layer);
457         return IsLayerOfType<Pooling2dLayer>(layer) && (layer->GetNameStr() == "pool2d") &&
458             (pool2dLayer->GetParameters() == pooling2dDescriptor);
459     };
460 
461     CHECK(CheckSequence(graph.cbegin(), graph.cend(),
462                              &IsLayerOfType<InputLayer>,
463                              &IsLayerOfType<PadLayer>,
464                              checkSimplePool2d,
465                              &IsLayerOfType<OutputLayer>));
466 
467     armnn::Optimizer::Pass(graph, MakeOptimizations(FoldPadIntoPooling2d()));
468 
469     // The optimization should not have modified the graph.
470     CHECK(CheckSequence(graph.cbegin(), graph.cend(),
471                              &IsLayerOfType<InputLayer>,
472                              &IsLayerOfType<PadLayer>,
473                              checkSimplePool2d,
474                              &IsLayerOfType<OutputLayer>));
475 }
476 
477 TEST_CASE("FoldPadLayerIntoPooling2dLayer_QuantizedAveragePoolingShouldNotBeFolded")
478 {
479     Graph              graph;
480     const unsigned int inputShape[]  = {1, 2, 2, 3};
481     const unsigned int paddedShape[] = {1, 4, 4, 3};
482     const unsigned int outputShape[] = {1, 2, 2, 3};
483 
484     TensorInfo inputInfo(4, inputShape, DataType::QAsymmU8);
485     TensorInfo paddedInfo(4, paddedShape, DataType::QAsymmU8);
486     TensorInfo outputInfo(4, outputShape, DataType::QAsymmU8);
487 
488     Layer* input = graph.AddLayer<InputLayer>(0, "input");
489     input->GetOutputSlot().SetTensorInfo(inputInfo);
490 
491     PadDescriptor padDescriptor({{0, 0},
492                                  {1, 1},
493                                  {1, 1},
494                                  {0, 0}});
495 
496     PadLayer* padLayer = graph.AddLayer<PadLayer>(padDescriptor, "pad");
497     padLayer->GetOutputSlot().SetTensorInfo(paddedInfo);
498 
499     Pooling2dDescriptor pooling2dDescriptor;
500     pooling2dDescriptor.m_PoolType   = PoolingAlgorithm::Average;
501     pooling2dDescriptor.m_PoolWidth  = 3;
502     pooling2dDescriptor.m_PoolHeight = 3;
503     pooling2dDescriptor.m_StrideX    = 1;
504     pooling2dDescriptor.m_StrideY    = 1;
505     pooling2dDescriptor.m_DataLayout = DataLayout::NHWC;
506 
507     Pooling2dLayer* pool2dLayer = graph.AddLayer<Pooling2dLayer>(pooling2dDescriptor, "pool2d");
508     pool2dLayer->GetOutputSlot().SetTensorInfo(outputInfo);
509 
510     Layer* output = graph.AddLayer<OutputLayer>(0, "output");
511 
512     // Connect up layers - input -> pad -> pool2d -> output
513     input->GetOutputSlot().Connect(padLayer->GetInputSlot(0));
514     padLayer->GetOutputSlot().Connect(pool2dLayer->GetInputSlot(0));
515     pool2dLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
516 
__anonf6df302a0a02(const Layer* const layer) 517     auto checkSimplePool2d = [&](const Layer* const layer) {
518         const auto pool2dLayer = static_cast<const Pooling2dLayer*>(layer);
519         return IsLayerOfType<Pooling2dLayer>(layer) && (layer->GetNameStr() == "pool2d") &&
520             (pool2dLayer->GetParameters() == pooling2dDescriptor);
521     };
522 
523     CHECK(CheckSequence(graph.cbegin(), graph.cend(),
524                         &IsLayerOfType<InputLayer>,
525                         &IsLayerOfType<PadLayer>,
526                         checkSimplePool2d,
527                         &IsLayerOfType<OutputLayer>));
528 
529     armnn::Optimizer::Pass(graph, MakeOptimizations(FoldPadIntoPooling2d()));
530 
531     // The optimization should not have modified the graph.
532     CHECK(CheckSequence(graph.cbegin(), graph.cend(),
533                         &IsLayerOfType<InputLayer>,
534                         &IsLayerOfType<PadLayer>,
535                         checkSimplePool2d,
536                         &IsLayerOfType<OutputLayer>));
537 }
538 
539 #if defined(ARMNNREF_ENABLED)
540 TEST_CASE("FoldPadLayerIntoPooling2dLayer_ExecuteInferenceWithAndWithoutOptimization")
541 {
542     // The idea of this test to run a simple pad+pool2d network twice. Once
543     // with FoldPadLayerIntoPooling2dLayer enabled and a second time with it
544     // avoided. The output tensors of each should match.
545     const unsigned int inputShape[]  = {1, 4, 4, 2};
546     const unsigned int paddedShape[] = {1, 6, 6, 2};
547     const unsigned int outputShape[] = {1, 4, 4, 2};
548     std::vector<float> inputData({2.0f, 2.0f, 6.0f, 6.0f,
549                                   4.0f, 4.0f, 8.0f, 8.0f,
550                                   10.0f, 12.0f, 14.0f, 16.0f,
551                                   10.0f, 12.0f, 16.0f, 14.0f,
552 
553                                   18.0f, 20.0f, 24.0f, 22.0f,
554                                   20.0f, 18.0f, 22.0f, 24.0f,
555                                   26.0f, 28.0f, 0.0f, 0.0f,
556                                   26.0f, 28.0f, 0.0f, 0.0f,
557                                  });
558     try
559     {
560         // Create a network of input, pad, pooling 2D, output.
561         INetworkPtr network = INetwork::Create();
562 
563         IConnectableLayer* inputLayer = network->AddInputLayer(0);
564         TensorInfo inputInfo(4, inputShape, DataType::Float32);
565         inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
566 
567         PadDescriptor padDescriptor({{0, 0},
568                                      {1, 1},
569                                      {1, 1},
570                                      {0, 0}});
571         IConnectableLayer* padLayer = network->AddPadLayer(padDescriptor, "Pad");
572         TensorInfo paddedInfo(4, paddedShape, DataType::Float32);
573         padLayer->GetOutputSlot(0).SetTensorInfo(paddedInfo);
574 
575         Pooling2dDescriptor pooling2dDescriptor;
576         pooling2dDescriptor.m_PoolType   = PoolingAlgorithm::Average;
577         pooling2dDescriptor.m_PoolWidth  = 3;
578         pooling2dDescriptor.m_PoolHeight = 3;
579         pooling2dDescriptor.m_StrideX    = 1;
580         pooling2dDescriptor.m_StrideY    = 1;
581         pooling2dDescriptor.m_DataLayout = DataLayout::NHWC;
582         IConnectableLayer* pool2dLayer = network->AddPooling2dLayer(pooling2dDescriptor, "Pool2D");
583         TensorInfo outputInfo(4, outputShape, DataType::Float32);
584         pool2dLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
585 
586         IConnectableLayer* outputLayer = network->AddOutputLayer(0);
587 
588         // Connect layers
589         inputLayer->GetOutputSlot(0).Connect(padLayer->GetInputSlot(0));
590         padLayer->GetOutputSlot(0).Connect(pool2dLayer->GetInputSlot(0));
591         pool2dLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
592 
593         // Create ArmNN runtime
594         IRuntimePtr          run              = IRuntime::Create(IRuntime::CreationOptions());    // default options
595         // Optimise the network
596         IOptimizedNetworkPtr optimizedNetwork = Optimize(*network, {Compute::CpuRef}, run->GetDeviceSpec());
597         // Load network into runtime
598         NetworkId            networkIdentifier;
599         CHECK(run->LoadNetwork(networkIdentifier, std::move(optimizedNetwork)) == Status::Success);
600 
601         TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
602         inputTensorInfo.SetConstant(true);
603         InputTensors inputTensors{{0, ConstTensor(inputTensorInfo, inputData.data())}};
604 
605         // Set the initial values of the data to different values to the golden data just in case the inference fails.
606         std::vector<float> optimizedData(32, -std::numeric_limits<float>::infinity());
607         OutputTensors      outputTensors{{0, Tensor(outputInfo, optimizedData.data())}};
608         // Execute network
609         run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
610         // Unload it.
611         run->UnloadNetwork(networkIdentifier);
612 
613         // In this second case the pad will have two outputs, one connected to the pooling layer the second connected to
614         // a second output layer. This will prevent the FoldPadLayerIntoPooling2dLayer optimization from working.
615         // A previous test, FoldPadLayerIntoPooling2d_PadWithMultipleOutputsShouldNotBeOptimized, has proved that doing
616         // this will avoid the optimization.
617         IConnectableLayer* dummyOutputLayer = network->AddOutputLayer(1);
618         padLayer->GetOutputSlot(0).Connect(dummyOutputLayer->GetInputSlot(0));
619 
620         // Optimize and load and execute it a second time.
621         optimizedNetwork = Optimize(*network, {Compute::CpuRef}, run->GetDeviceSpec());
622         CHECK(run->LoadNetwork(networkIdentifier, std::move(optimizedNetwork)) == Status::Success);
623         std::vector<float> goldenData(32, 0.0f);
624         std::vector<float> padOutputData(72, 0.0f);
625         OutputTensors      goldenTensors{{0, Tensor(outputInfo, goldenData.data())},
626                                          {1, Tensor(paddedInfo, padOutputData.data())}};
627         run->EnqueueWorkload(networkIdentifier, inputTensors, goldenTensors);
628 
629         // Now we can compare goldenData against optimizedData. They should be the same.
630         CHECK(std::equal(goldenData.begin(), goldenData.end(), optimizedData.begin()));
631     }
632     catch (const std::exception& e)
633     {
634         std::cerr << e.what() << std::endl;
635         ARMNN_ASSERT_MSG(false, e.what());
636     }
637 }
638 
639 TEST_CASE("FoldPadLayerIntoConv2dLayer_ExecuteInferenceWithAndWithoutOptimization")
640 {
641     // The idea of this test to run a simple pad+conv2d network twice. Once
642     // with FoldPadLayerIntoConv2dLayer enabled and a second time with it
643     // avoided. The output tensors of each should match.
644     const unsigned int inputShape[]   = {1, 4, 4, 3}; // NHWCin
645     const unsigned int paddedShape[]  = {1, 6, 6, 3};
646     const unsigned int weightsShape[] = {4, 2, 2, 3}; // CoutHWCin
647     const unsigned int outputShape[]  = {1, 5, 5, 4}; // NHWCout
648 
649     std::vector<float> inputData({2.0f, 2.0f, 6.0f, 6.0f,
650                                   4.0f, 4.0f, 8.0f, 8.0f,
651                                   10.0f, 12.0f, 14.0f, 16.0f,
652                                   10.0f, 12.0f, 16.0f, 14.0f,
653 
654                                   18.0f, 20.0f, 24.0f, 22.0f,
655                                   20.0f, 18.0f, 22.0f, 24.0f,
656                                   26.0f, 28.0f, 0.0f, 0.0f,
657                                   26.0f, 28.0f, 0.0f, 0.0f,
658 
659                                   2.0f, 2.0f, 6.0f, 6.0f,
660                                   4.0f, 4.0f, 8.0f, 8.0f,
661                                   10.0f, 12.0f, 14.0f, 16.0f,
662                                   10.0f, 12.0f, 16.0f, 14.0f,
663                                  });
664     try
665     {
666         // Create a network of input, pad, pooling 2D, output.
667         INetworkPtr network = INetwork::Create();
668 
669         IConnectableLayer* inputLayer = network->AddInputLayer(0);
670         TensorInfo inputInfo(4, inputShape, DataType::Float32);
671         inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
672 
673         PadDescriptor padDescriptor({{0, 0},
674                                      {1, 1},
675                                      {1, 1},
676                                      {0, 0}});
677         IConnectableLayer* padLayer = network->AddPadLayer(padDescriptor, "Pad");
678         TensorInfo paddedInfo(4, paddedShape, DataType::Float32);
679         padLayer->GetOutputSlot(0).SetTensorInfo(paddedInfo);
680 
681         Convolution2dDescriptor convDescriptor;
682         convDescriptor.m_DataLayout  = DataLayout::NHWC;
683         convDescriptor.m_StrideX     = 1;
684         convDescriptor.m_StrideY     = 1;
685         convDescriptor.m_BiasEnabled = true;
686 
687         std::vector<float>    weightsData  = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
688                                               11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
689                                               21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
690                                               31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
691         TensorInfo            weightsInfo(4, weightsShape, DataType::Float32, 1.0f, 0, true);
692         ConstTensor           weights(weightsInfo, weightsData);
693         std::vector<float>    biasVector   = {5, 6, 7, 8};
694         TensorInfo            biasInfo({4}, DataType::Float32, 1.0f, 0, true);
695         ConstTensor           bias(biasInfo, biasVector);
696 
697         IConnectableLayer* conv2dLayer = network->AddConvolution2dLayer(convDescriptor, "Conv2D");
698 
699         TensorInfo outputInfo(4, outputShape, DataType::Float32);
700         conv2dLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
701 
702         IConnectableLayer* outputLayer = network->AddOutputLayer(0);
703 
704         // Connect layers
705         inputLayer->GetOutputSlot(0).Connect(padLayer->GetInputSlot(0));
706         padLayer->GetOutputSlot(0).Connect(conv2dLayer->GetInputSlot(0));
707         conv2dLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
708 
709         auto weightsLayer = network->AddConstantLayer(weights, "Weights");
710         weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
711         weightsLayer->GetOutputSlot(0).Connect(conv2dLayer->GetInputSlot(1));
712 
713         auto biasLayer = network->AddConstantLayer(bias, "Bias");
714         biasLayer->GetOutputSlot(0).SetTensorInfo(bias.GetInfo());
715         biasLayer->GetOutputSlot(0).Connect(conv2dLayer->GetInputSlot(2));
716 
717         // Create ArmNN runtime
718         IRuntimePtr          run              = IRuntime::Create(IRuntime::CreationOptions());    // default options
719         // Optimise the network
720         IOptimizedNetworkPtr optimizedNetwork = Optimize(*network, {Compute::CpuRef}, run->GetDeviceSpec());
721         // Load network into runtime
722         NetworkId            networkIdentifier;
723         CHECK(run->LoadNetwork(networkIdentifier, std::move(optimizedNetwork)) == Status::Success);
724 
725         TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
726         inputTensorInfo.SetConstant(true);
727         InputTensors inputTensors{{0, ConstTensor(inputTensorInfo, inputData.data())}};
728 
729         // Set the initial values of the data to different values to the golden data just in case the inference fails.
730         std::vector<float> optimizedData(100, -std::numeric_limits<float>::infinity());
731         OutputTensors      outputTensors{{0, Tensor(outputInfo, optimizedData.data())}};
732         // Execute network
733         run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
734         // Unload it.
735         run->UnloadNetwork(networkIdentifier);
736 
737         // In this second case the pad will have two outputs, one connected to the conv layer the second connected to
738         // a second output layer. This will prevent the FoldPadLayerIntoConv2dLayer optimization from working.
739         // A previous test, FoldPadLayerIntoConv2d_PadWithMultipleOutputsShouldNotBeOptimized, has proved that doing
740         // this will avoid the optimization.
741         IConnectableLayer* dummyOutputLayer = network->AddOutputLayer(1);
742         padLayer->GetOutputSlot(0).Connect(dummyOutputLayer->GetInputSlot(0));
743 
744         // Optimize and load and execute it a second time.
745         optimizedNetwork = Optimize(*network, {Compute::CpuRef}, run->GetDeviceSpec());
746         CHECK(run->LoadNetwork(networkIdentifier, std::move(optimizedNetwork)) == Status::Success);
747         std::vector<float> goldenData(100, 0.0f);
748         std::vector<float> padOutputData(108, 0.0f);
749         OutputTensors      goldenTensors{{0, Tensor(outputInfo, goldenData.data())},
750                                          {1, Tensor(paddedInfo, padOutputData.data())}};
751         run->EnqueueWorkload(networkIdentifier, inputTensors, goldenTensors);
752 
753         // Now we can compare goldenData against optimizedData. They should be the same.
754         CHECK(std::equal(goldenData.begin(), goldenData.end(), optimizedData.begin()));
755     }
756     catch (const std::exception& e)
757     {
758         std::cerr << e.what() << std::endl;
759         ARMNN_ASSERT_MSG(false, e.what());
760     }
761 }
762 
763 TEST_CASE("FoldPadLayerIntoDepthwiseConv2dLayer_ExecuteInferenceWithAndWithoutOptimization")
764 {
765     // The idea of this test to run a simple pad+depthwiseconv2d network twice. Once
766     // with FoldPadLayerIntoDeptwiseConv2dLayer enabled and a second time with it
767     // avoided. The output tensors of each should match.
768     const unsigned int inputShape[]   = {1, 4, 4, 3}; // NHWCin
769     const unsigned int paddedShape[]  = {1, 6, 6, 3};
770     const unsigned int weightsShape[] = {1, 2, 2, 12};  // 1HWCout
771     const unsigned int outputShape[]  = {1, 5, 5, 12}; // NHWCout
772 
773     std::vector<float> inputData({2.0f, 2.0f, 6.0f, 6.0f,
774                                   4.0f, 4.0f, 8.0f, 8.0f,
775                                   10.0f, 12.0f, 14.0f, 16.0f,
776                                   10.0f, 12.0f, 16.0f, 14.0f,
777 
778                                   18.0f, 20.0f, 24.0f, 22.0f,
779                                   20.0f, 18.0f, 22.0f, 24.0f,
780                                   26.0f, 28.0f, 0.0f, 0.0f,
781                                   26.0f, 28.0f, 0.0f, 0.0f,
782 
783                                   2.0f, 2.0f, 6.0f, 6.0f,
784                                   4.0f, 4.0f, 8.0f, 8.0f,
785                                   10.0f, 12.0f, 14.0f, 16.0f,
786                                   10.0f, 12.0f, 16.0f, 14.0f,
787                                  });
788     try
789     {
790         // Create a network of input, pad, pooling 2D, output.
791         INetworkPtr network = INetwork::Create();
792 
793         IConnectableLayer* inputLayer = network->AddInputLayer(0);
794         TensorInfo inputInfo(4, inputShape, DataType::Float32);
795         inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
796 
797         PadDescriptor padDescriptor({{0, 0},
798                                      {1, 1},
799                                      {1, 1},
800                                      {0, 0}});
801         IConnectableLayer* padLayer = network->AddPadLayer(padDescriptor, "Pad");
802         TensorInfo paddedInfo(4, paddedShape, DataType::Float32);
803         padLayer->GetOutputSlot(0).SetTensorInfo(paddedInfo);
804 
805         DepthwiseConvolution2dDescriptor convDescriptor;
806         convDescriptor.m_DataLayout  = DataLayout::NHWC;
807         convDescriptor.m_StrideX     = 1;
808         convDescriptor.m_StrideY     = 1;
809         convDescriptor.m_BiasEnabled = true;
810 
811         std::vector<float>    weightsData  = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
812                                               11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
813                                               21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
814                                               31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
815         TensorInfo            weightsInfo(4, weightsShape, DataType::Float32, 0.0f, 0, true);
816         ConstTensor           weights(weightsInfo, weightsData);
817         std::vector<float>    biasVector   = {5, 6, 7, 8, 9, 10, 11, 12, 5, 6, 7, 8};
818         TensorInfo            biasInfo({12}, DataType::Float32, 0.0f, 0, true);
819         ConstTensor           bias(biasInfo, biasVector);
820 
821         IConnectableLayer* conv2dLayer = network->AddDepthwiseConvolution2dLayer(convDescriptor,
822                                                                                  "DepthwiseConv2D");
823 
824         TensorInfo outputInfo(4, outputShape, DataType::Float32);
825         conv2dLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
826 
827         IConnectableLayer* outputLayer = network->AddOutputLayer(0);
828 
829         // Connect layers
830         inputLayer->GetOutputSlot(0).Connect(padLayer->GetInputSlot(0));
831         padLayer->GetOutputSlot(0).Connect(conv2dLayer->GetInputSlot(0));
832         conv2dLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
833 
834         auto weightsLayer = network->AddConstantLayer(weights, "Weights");
835         weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
836         weightsLayer->GetOutputSlot(0).Connect(conv2dLayer->GetInputSlot(1));
837 
838         auto biasLayer = network->AddConstantLayer(bias, "Bias");
839         biasLayer->GetOutputSlot(0).SetTensorInfo(bias.GetInfo());
840         biasLayer->GetOutputSlot(0).Connect(conv2dLayer->GetInputSlot(2));
841 
842         // Create ArmNN runtime
843         IRuntimePtr          run              = IRuntime::Create(IRuntime::CreationOptions());    // default options
844         // Optimise the network
845         IOptimizedNetworkPtr optimizedNetwork = Optimize(*network, {Compute::CpuRef}, run->GetDeviceSpec());
846         // Load network into runtime
847         NetworkId            networkIdentifier;
848         CHECK(run->LoadNetwork(networkIdentifier, std::move(optimizedNetwork)) == Status::Success);
849 
850         TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
851         inputTensorInfo.SetConstant(true);
852         InputTensors inputTensors{{0, ConstTensor(inputTensorInfo, inputData.data())}};
853 
854         // Set the initial values of the data to different values to the golden data just in case the inference fails.
855         std::vector<float> optimizedData(300, -std::numeric_limits<float>::infinity());
856         OutputTensors      outputTensors{{0, Tensor(outputInfo, optimizedData.data())}};
857         // Execute network
858         run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
859         // Unload it.
860         run->UnloadNetwork(networkIdentifier);
861 
862         // In this second case the pad will have two outputs, one connected to the conv layer the second connected to
863         // a second output layer. This will prevent the FoldPadLayerIntoDepthwiseConv2dLayer optimization from working.
864         // A previous test, FoldPadLayerIntoDepthwiseConv2d_PadWithMultipleOutputsShouldNotBeOptimized, has proved that
865         // doing this will avoid the optimization.
866         IConnectableLayer* dummyOutputLayer = network->AddOutputLayer(1);
867         padLayer->GetOutputSlot(0).Connect(dummyOutputLayer->GetInputSlot(0));
868 
869         // Optimize and load and execute it a second time.
870         optimizedNetwork = Optimize(*network, {Compute::CpuRef}, run->GetDeviceSpec());
871         CHECK(run->LoadNetwork(networkIdentifier, std::move(optimizedNetwork)) == Status::Success);
872         std::vector<float> goldenData(300, 0.0f);
873         std::vector<float> padOutputData(108, 0.0f);
874         OutputTensors      goldenTensors{{0, Tensor(outputInfo, goldenData.data())},
875                                          {1, Tensor(paddedInfo, padOutputData.data())}};
876         run->EnqueueWorkload(networkIdentifier, inputTensors, goldenTensors);
877 
878         // Now we can compare goldenData against optimizedData. They should be the same.
879         CHECK(std::equal(goldenData.begin(), goldenData.end(), optimizedData.begin()));
880     }
881     catch (const std::exception& e)
882     {
883         std::cerr << e.what() << std::endl;
884         ARMNN_ASSERT_MSG(false, e.what());
885     }
886 }
887 #endif
888 
889 }