xref: /aosp_15_r20/external/ComputeLibrary/tests/framework/instruments/OpenCLTimer.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1*c217d954SCole Faust /*
2*c217d954SCole Faust  * Copyright (c) 2017-2019, 2021 Arm Limited.
3*c217d954SCole Faust  *
4*c217d954SCole Faust  * SPDX-License-Identifier: MIT
5*c217d954SCole Faust  *
6*c217d954SCole Faust  * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust  * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust  * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust  * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust  * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust  *
13*c217d954SCole Faust  * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust  * copies or substantial portions of the Software.
15*c217d954SCole Faust  *
16*c217d954SCole Faust  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust  * SOFTWARE.
23*c217d954SCole Faust  */
24*c217d954SCole Faust #include "OpenCLTimer.h"
25*c217d954SCole Faust 
26*c217d954SCole Faust #include "../Framework.h"
27*c217d954SCole Faust #include "../Utils.h"
28*c217d954SCole Faust 
29*c217d954SCole Faust #include "arm_compute/graph/INode.h"
30*c217d954SCole Faust #include "arm_compute/runtime/CL/CLScheduler.h"
31*c217d954SCole Faust 
32*c217d954SCole Faust #ifndef ARM_COMPUTE_CL
33*c217d954SCole Faust #error "You can't use OpenCLTimer without OpenCL"
34*c217d954SCole Faust #endif /* ARM_COMPUTE_CL */
35*c217d954SCole Faust 
36*c217d954SCole Faust namespace arm_compute
37*c217d954SCole Faust {
38*c217d954SCole Faust namespace test
39*c217d954SCole Faust {
40*c217d954SCole Faust namespace framework
41*c217d954SCole Faust {
42*c217d954SCole Faust template <bool output_timestamps>
id() const43*c217d954SCole Faust std::string    OpenCLClock<output_timestamps>::id() const
44*c217d954SCole Faust {
45*c217d954SCole Faust     if(output_timestamps)
46*c217d954SCole Faust     {
47*c217d954SCole Faust         return "OpenCLTimestamps";
48*c217d954SCole Faust     }
49*c217d954SCole Faust     else
50*c217d954SCole Faust     {
51*c217d954SCole Faust         return "OpenCLTimer";
52*c217d954SCole Faust     }
53*c217d954SCole Faust }
54*c217d954SCole Faust 
55*c217d954SCole Faust template <bool output_timestamps>
OpenCLClock(ScaleFactor scale_factor)56*c217d954SCole Faust OpenCLClock<output_timestamps>::OpenCLClock(ScaleFactor scale_factor)
57*c217d954SCole Faust     : _kernels(),
58*c217d954SCole Faust       _real_function(nullptr),
59*c217d954SCole Faust #ifdef ARM_COMPUTE_GRAPH_ENABLED
60*c217d954SCole Faust       _real_graph_function(nullptr),
61*c217d954SCole Faust #endif /* ARM_COMPUTE_GRAPH_ENABLED */
62*c217d954SCole Faust       _prefix(),
63*c217d954SCole Faust       _timer_enabled(false)
64*c217d954SCole Faust {
65*c217d954SCole Faust     auto                        q     = CLScheduler::get().queue();
66*c217d954SCole Faust     cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>();
67*c217d954SCole Faust     if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
68*c217d954SCole Faust     {
69*c217d954SCole Faust         CLScheduler::get().set_queue(cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE));
70*c217d954SCole Faust     }
71*c217d954SCole Faust 
72*c217d954SCole Faust     switch(scale_factor)
73*c217d954SCole Faust     {
74*c217d954SCole Faust         case ScaleFactor::NONE:
75*c217d954SCole Faust             _scale_factor = 1.f;
76*c217d954SCole Faust             _unit         = "ns";
77*c217d954SCole Faust             break;
78*c217d954SCole Faust         case ScaleFactor::TIME_US:
79*c217d954SCole Faust             _scale_factor = 1000.f;
80*c217d954SCole Faust             _unit         = "us";
81*c217d954SCole Faust             break;
82*c217d954SCole Faust         case ScaleFactor::TIME_MS:
83*c217d954SCole Faust             _scale_factor = 1000000.f;
84*c217d954SCole Faust             _unit         = "ms";
85*c217d954SCole Faust             break;
86*c217d954SCole Faust         case ScaleFactor::TIME_S:
87*c217d954SCole Faust             _scale_factor = 1000000000.f;
88*c217d954SCole Faust             _unit         = "s";
89*c217d954SCole Faust             break;
90*c217d954SCole Faust         default:
91*c217d954SCole Faust             ARM_COMPUTE_ERROR("Invalid scale");
92*c217d954SCole Faust     }
93*c217d954SCole Faust }
94*c217d954SCole Faust 
95*c217d954SCole Faust template <bool output_timestamps>
test_start()96*c217d954SCole Faust void           OpenCLClock<output_timestamps>::test_start()
97*c217d954SCole Faust {
98*c217d954SCole Faust     // Start intercepting enqueues:
99*c217d954SCole Faust     ARM_COMPUTE_ERROR_ON(_real_function != nullptr);
100*c217d954SCole Faust     _real_function   = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
101*c217d954SCole Faust     auto interceptor = [this](
102*c217d954SCole Faust                            cl_command_queue command_queue,
103*c217d954SCole Faust                            cl_kernel        kernel,
104*c217d954SCole Faust                            cl_uint          work_dim,
105*c217d954SCole Faust                            const size_t    *gwo,
106*c217d954SCole Faust                            const size_t    *gws,
107*c217d954SCole Faust                            const size_t    *lws,
108*c217d954SCole Faust                            cl_uint          num_events_in_wait_list,
109*c217d954SCole Faust                            const cl_event * event_wait_list,
110*c217d954SCole Faust                            cl_event *       event)
111*c217d954SCole Faust     {
112*c217d954SCole Faust         if(this->_timer_enabled)
113*c217d954SCole Faust         {
114*c217d954SCole Faust             kernel_info       info;
115*c217d954SCole Faust             cl::Kernel        cpp_kernel(kernel, true);
116*c217d954SCole Faust             std::stringstream ss;
117*c217d954SCole Faust             ss << this->_prefix << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
118*c217d954SCole Faust             if(gws != nullptr)
119*c217d954SCole Faust             {
120*c217d954SCole Faust                 ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
121*c217d954SCole Faust             }
122*c217d954SCole Faust             if(lws != nullptr)
123*c217d954SCole Faust             {
124*c217d954SCole Faust                 ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]";
125*c217d954SCole Faust             }
126*c217d954SCole Faust             info.name = ss.str();
127*c217d954SCole Faust             cl_event tmp;
128*c217d954SCole Faust             cl_int   retval = this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
129*c217d954SCole Faust             info.event      = tmp;
130*c217d954SCole Faust             this->_kernels.push_back(std::move(info));
131*c217d954SCole Faust 
132*c217d954SCole Faust             if(event != nullptr)
133*c217d954SCole Faust             {
134*c217d954SCole Faust                 //return cl_event from the intercepted call
135*c217d954SCole Faust                 clRetainEvent(tmp);
136*c217d954SCole Faust                 *event = tmp;
137*c217d954SCole Faust             }
138*c217d954SCole Faust             return retval;
139*c217d954SCole Faust         }
140*c217d954SCole Faust         else
141*c217d954SCole Faust         {
142*c217d954SCole Faust             return this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, event);
143*c217d954SCole Faust         }
144*c217d954SCole Faust     };
145*c217d954SCole Faust     CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
146*c217d954SCole Faust 
147*c217d954SCole Faust #ifdef ARM_COMPUTE_GRAPH_ENABLED
148*c217d954SCole Faust     ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
149*c217d954SCole Faust     _real_graph_function = graph::TaskExecutor::get().execute_function;
150*c217d954SCole Faust     // Start intercepting tasks:
151*c217d954SCole Faust     auto task_interceptor = [this](graph::ExecutionTask & task)
152*c217d954SCole Faust     {
153*c217d954SCole Faust         if(task.node != nullptr && !task.node->name().empty())
154*c217d954SCole Faust         {
155*c217d954SCole Faust             this->_prefix = task.node->name() + "/";
156*c217d954SCole Faust         }
157*c217d954SCole Faust         else
158*c217d954SCole Faust         {
159*c217d954SCole Faust             this->_prefix = "";
160*c217d954SCole Faust         }
161*c217d954SCole Faust         this->_real_graph_function(task);
162*c217d954SCole Faust         this->_prefix = "";
163*c217d954SCole Faust     };
164*c217d954SCole Faust     graph::TaskExecutor::get().execute_function = task_interceptor;
165*c217d954SCole Faust #endif /* ARM_COMPUTE_GRAPH_ENABLED */
166*c217d954SCole Faust }
167*c217d954SCole Faust 
168*c217d954SCole Faust template <bool output_timestamps>
start()169*c217d954SCole Faust void           OpenCLClock<output_timestamps>::start()
170*c217d954SCole Faust {
171*c217d954SCole Faust     _kernels.clear();
172*c217d954SCole Faust     _timer_enabled = true;
173*c217d954SCole Faust }
174*c217d954SCole Faust template <bool output_timestamps>
stop()175*c217d954SCole Faust void           OpenCLClock<output_timestamps>::stop()
176*c217d954SCole Faust {
177*c217d954SCole Faust     _timer_enabled = false;
178*c217d954SCole Faust }
179*c217d954SCole Faust 
180*c217d954SCole Faust template <bool output_timestamps>
test_stop()181*c217d954SCole Faust void           OpenCLClock<output_timestamps>::test_stop()
182*c217d954SCole Faust {
183*c217d954SCole Faust     // Restore real function
184*c217d954SCole Faust     CLSymbols::get().clEnqueueNDRangeKernel_ptr = _real_function;
185*c217d954SCole Faust     _real_function                              = nullptr;
186*c217d954SCole Faust #ifdef ARM_COMPUTE_GRAPH_ENABLED
187*c217d954SCole Faust     graph::TaskExecutor::get().execute_function = _real_graph_function;
188*c217d954SCole Faust     _real_graph_function                        = nullptr;
189*c217d954SCole Faust #endif /* ARM_COMPUTE_GRAPH_ENABLED */
190*c217d954SCole Faust }
191*c217d954SCole Faust 
192*c217d954SCole Faust template <bool              output_timestamps>
measurements() const193*c217d954SCole Faust Instrument::MeasurementsMap OpenCLClock<output_timestamps>::measurements() const
194*c217d954SCole Faust {
195*c217d954SCole Faust     MeasurementsMap measurements;
196*c217d954SCole Faust     unsigned int    kernel_number = 0;
197*c217d954SCole Faust     for(auto const &kernel : _kernels)
198*c217d954SCole Faust     {
199*c217d954SCole Faust         cl_ulong queued;
200*c217d954SCole Faust         cl_ulong flushed;
201*c217d954SCole Faust         cl_ulong start;
202*c217d954SCole Faust         cl_ulong end;
203*c217d954SCole Faust         kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &queued);
204*c217d954SCole Faust         kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &flushed);
205*c217d954SCole Faust         kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
206*c217d954SCole Faust         kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
207*c217d954SCole Faust         std::string name = kernel.name + " #" + support::cpp11::to_string(kernel_number++);
208*c217d954SCole Faust 
209*c217d954SCole Faust         if(output_timestamps)
210*c217d954SCole Faust         {
211*c217d954SCole Faust             measurements.emplace("[start]" + name, Measurement(start / static_cast<cl_ulong>(_scale_factor), _unit));
212*c217d954SCole Faust             measurements.emplace("[queued]" + name, Measurement(queued / static_cast<cl_ulong>(_scale_factor), _unit));
213*c217d954SCole Faust             measurements.emplace("[flushed]" + name, Measurement(flushed / static_cast<cl_ulong>(_scale_factor), _unit));
214*c217d954SCole Faust             measurements.emplace("[end]" + name, Measurement(end / static_cast<cl_ulong>(_scale_factor), _unit));
215*c217d954SCole Faust         }
216*c217d954SCole Faust         else
217*c217d954SCole Faust         {
218*c217d954SCole Faust             measurements.emplace(name, Measurement((end - start) / _scale_factor, _unit));
219*c217d954SCole Faust         }
220*c217d954SCole Faust     }
221*c217d954SCole Faust 
222*c217d954SCole Faust     return measurements;
223*c217d954SCole Faust }
224*c217d954SCole Faust 
225*c217d954SCole Faust template <bool              output_timestamps>
test_measurements() const226*c217d954SCole Faust Instrument::MeasurementsMap OpenCLClock<output_timestamps>::test_measurements() const
227*c217d954SCole Faust {
228*c217d954SCole Faust     MeasurementsMap measurements;
229*c217d954SCole Faust 
230*c217d954SCole Faust     if(output_timestamps)
231*c217d954SCole Faust     {
232*c217d954SCole Faust         // The OpenCL clock and the wall clock are not in sync, so we use
233*c217d954SCole Faust         // this trick to calculate the offset between the two clocks:
234*c217d954SCole Faust         ::cl::Event event;
235*c217d954SCole Faust         cl_ulong    now_gpu;
236*c217d954SCole Faust 
237*c217d954SCole Faust         // Enqueue retrieve current CPU clock and enqueue a dummy marker
238*c217d954SCole Faust         std::chrono::high_resolution_clock::time_point now_cpu = std::chrono::high_resolution_clock::now();
239*c217d954SCole Faust         CLScheduler::get().queue().enqueueMarker(&event);
240*c217d954SCole Faust 
241*c217d954SCole Faust         CLScheduler::get().queue().finish();
242*c217d954SCole Faust         //Access the time at which the marker was enqueued:
243*c217d954SCole Faust         event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &now_gpu);
244*c217d954SCole Faust 
245*c217d954SCole Faust         measurements.emplace("Now Wall clock", Measurement(now_cpu.time_since_epoch().count() / 1000, "us"));
246*c217d954SCole Faust         measurements.emplace("Now OpenCL", Measurement(now_gpu / static_cast<cl_ulong>(_scale_factor), _unit));
247*c217d954SCole Faust     }
248*c217d954SCole Faust 
249*c217d954SCole Faust     return measurements;
250*c217d954SCole Faust }
251*c217d954SCole Faust 
252*c217d954SCole Faust } // namespace framework
253*c217d954SCole Faust } // namespace test
254*c217d954SCole Faust } // namespace arm_compute
255*c217d954SCole Faust 
256*c217d954SCole Faust template class arm_compute::test::framework::OpenCLClock<true>;
257*c217d954SCole Faust template class arm_compute::test::framework::OpenCLClock<false>;
258