1from enum import Enum 2from typing import Any, Literal 3from typing_extensions import TypeAlias 4 5from torch._C import device, dtype, layout 6 7# defined in torch/csrc/profiler/python/init.cpp 8 9class RecordScope(Enum): 10 FUNCTION = ... 11 BACKWARD_FUNCTION = ... 12 TORCHSCRIPT_FUNCTION = ... 13 KERNEL_FUNCTION_DTYPE = ... 14 CUSTOM_CLASS = ... 15 BUILD_FEATURE = ... 16 LITE_INTERPRETER = ... 17 USER_SCOPE = ... 18 STATIC_RUNTIME_OP = ... 19 STATIC_RUNTIME_MODEL = ... 20 21class ProfilerState(Enum): 22 Disable = ... 23 CPU = ... 24 CUDA = ... 25 NVTX = ... 26 ITT = ... 27 KINETO = ... 28 KINETO_GPU_FALLBACK = ... 29 KINETO_PRIVATEUSE1_FALLBACK = ... 30 KINETO_PRIVATEUSE1 = ... 31 32class ActiveProfilerType(Enum): 33 NONE = ... 34 LEGACY = ... 35 KINETO = ... 36 NVTX = ... 37 ITT = ... 38 39class ProfilerActivity(Enum): 40 CPU = ... 41 CUDA = ... 42 XPU = ... 43 MTIA = ... 44 PrivateUse1 = ... 45 46class _EventType(Enum): 47 TorchOp = ... 48 Backend = ... 49 Allocation = ... 50 OutOfMemory = ... 51 PyCall = ... 52 PyCCall = ... 53 Kineto = ... 54 55class _ExperimentalConfig: 56 def __init__( 57 self, 58 profiler_metrics: list[str] = ..., 59 profiler_measure_per_kernel: bool = ..., 60 verbose: bool = ..., 61 performance_events: list[str] = ..., 62 enable_cuda_sync_events: bool = ..., 63 ) -> None: ... 64 65class ProfilerConfig: 66 def __init__( 67 self, 68 state: ProfilerState, 69 report_input_shapes: bool, 70 profile_memory: bool, 71 with_stack: bool, 72 with_flops: bool, 73 with_modules: bool, 74 experimental_config: _ExperimentalConfig, 75 ) -> None: ... 76 77class _ProfilerEvent: 78 start_tid: int 79 start_time_ns: int 80 children: list[_ProfilerEvent] 81 82 # TODO(robieta): remove in favor of `self.typed` 83 extra_fields: ( 84 _ExtraFields_TorchOp 85 | _ExtraFields_Backend 86 | _ExtraFields_Allocation 87 | _ExtraFields_OutOfMemory 88 | _ExtraFields_PyCall 89 | _ExtraFields_PyCCall 90 | _ExtraFields_Kineto 91 ) 92 93 @property 94 def typed( 95 self, 96 ) -> ( 97 tuple[Literal[_EventType.TorchOp], _ExtraFields_TorchOp] 98 | tuple[Literal[_EventType.Backend], _ExtraFields_Backend] 99 | tuple[Literal[_EventType.Allocation], _ExtraFields_Allocation] 100 | tuple[Literal[_EventType.OutOfMemory], _ExtraFields_OutOfMemory] 101 | tuple[Literal[_EventType.PyCall], _ExtraFields_PyCall] 102 | tuple[Literal[_EventType.PyCCall], _ExtraFields_PyCCall] 103 | tuple[Literal[_EventType.Kineto], _ExtraFields_Kineto] 104 ): ... 105 @property 106 def name(self) -> str: ... 107 @property 108 def tag(self) -> _EventType: ... 109 @property 110 def id(self) -> int: ... 111 @property 112 def parent(self) -> _ProfilerEvent | None: ... 113 @property 114 def correlation_id(self) -> int: ... 115 @property 116 def end_time_ns(self) -> int: ... 117 @property 118 def duration_time_ns(self) -> int: ... 119 120class _TensorMetadata: 121 impl_ptr: int | None 122 storage_data_ptr: int | None 123 id: int | None 124 125 @property 126 def allocation_id(self) -> int | None: ... 127 @property 128 def layout(self) -> layout: ... 129 @property 130 def device(self) -> device: ... 131 @property 132 def dtype(self) -> dtype: ... 133 @property 134 def sizes(self) -> list[int]: ... 135 @property 136 def strides(self) -> list[int]: ... 137 138Scalar: TypeAlias = int | float | bool | complex 139Input: TypeAlias = _TensorMetadata | list[_TensorMetadata] | Scalar | None 140 141class _ExtraFields_TorchOp: 142 name: str 143 sequence_number: int 144 allow_tf32_cublas: bool 145 146 @property 147 def inputs(self) -> list[Input]: ... 148 @property 149 def scope(self) -> RecordScope: ... 150 151class _ExtraFields_Backend: ... 152 153class _ExtraFields_Allocation: 154 ptr: int 155 id: int | None 156 alloc_size: int 157 total_allocated: int 158 total_reserved: int 159 160 @property 161 def allocation_id(self) -> int | None: ... 162 @property 163 def device(self) -> device: ... 164 165class _ExtraFields_OutOfMemory: ... 166 167class _PyFrameState: 168 line_number: int 169 function_name: str 170 171 @property 172 def file_name(self) -> str: ... 173 174class _NNModuleInfo: 175 @property 176 def self_ptr(self) -> int: ... 177 @property 178 def cls_ptr(self) -> int: ... 179 @property 180 def cls_name(self) -> str: ... 181 @property 182 def parameters( 183 self, 184 ) -> list[tuple[str, _TensorMetadata, _TensorMetadata | None]]: ... 185 186class _OptimizerInfo: 187 @property 188 def parameters( 189 self, 190 ) -> list[ 191 tuple[ 192 # Parameter 193 _TensorMetadata, 194 # 195 # Gradient (if present during optimizer.step()) 196 _TensorMetadata | None, 197 # 198 # Optimizer state for Parameter as (name, tensor) pairs 199 list[tuple[str, _TensorMetadata]], 200 ] 201 ]: ... 202 203class _ExtraFields_PyCCall: 204 @property 205 def caller(self) -> _PyFrameState: ... 206 207class _ExtraFields_PyCall: 208 @property 209 def callsite(self) -> _PyFrameState: ... 210 @property 211 def caller(self) -> _PyFrameState: ... 212 @property 213 def module(self) -> _NNModuleInfo | None: ... 214 @property 215 def optimizer(self) -> _OptimizerInfo | None: ... 216 217class _ExtraFields_Kineto: ... 218 219def _add_execution_trace_observer(output_file_path: str) -> bool: ... 220def _remove_execution_trace_observer() -> None: ... 221def _enable_execution_trace_observer() -> None: ... 222def _disable_execution_trace_observer() -> None: ... 223def _set_record_concrete_inputs_enabled_val(val: bool) -> None: ... 224def _set_fwd_bwd_enabled_val(val: bool) -> None: ... 225def _set_cuda_sync_enabled_val(val: bool) -> None: ... 226 227class CapturedTraceback: ... 228 229def gather_traceback(python: bool, script: bool, cpp: bool) -> CapturedTraceback: ... 230 231# The Dict has name, filename, line 232def symbolize_tracebacks( 233 to_symbolize: list[CapturedTraceback], 234) -> list[list[dict[str, str]]]: ... 235 236class _RecordFunctionFast: 237 def __init__( 238 self, 239 name: str, 240 input_values: list | tuple | None = None, 241 keyword_values: dict | None = None, 242 ) -> None: ... 243 def __enter__(self) -> None: ... 244 def __exit__(self, *args: Any) -> None: ... 245