1# Owner(s): ["module: inductor"] 2import importlib 3import os 4import sys 5 6import torch 7from torch._inductor.compile_fx import compile_fx 8from torch._inductor.test_case import TestCase 9from torch.testing._internal.common_utils import TEST_WITH_ASAN, TEST_WITH_ROCM 10from torch.testing._internal.inductor_utils import ( 11 _check_has_dynamic_shape, 12 GPU_TYPE, 13 HAS_CPU, 14 HAS_GPU, 15) 16 17 18importlib.import_module("filelock") 19 20# Make the helper files in test/ importable 21pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 22sys.path.append(pytorch_test_dir) 23from inductor.test_torchinductor import ( 24 CommonTemplate, 25 copy_tests, 26 run_and_get_cpp_code, 27 run_and_get_triton_code, 28 TestFailure, 29) 30from inductor.test_torchinductor_dynamic_shapes import ( 31 make_dynamic_cls, 32 test_failures as dynamic_shapes_test_failures, 33) 34 35 36# Checks for patterns in generated C++/Triton code to see if it's dynamic 37def check_codegen( 38 self: TestCase, 39 model, 40 example_inputs, 41 kwargs=None, 42 *, 43 is_cpp_code: bool, 44): 45 kwargs = kwargs or {} 46 47 if is_cpp_code is False: 48 if hasattr(model, "to"): 49 model = model.to(device=GPU_TYPE) 50 51 def copy_fn(x): 52 # preserve strides of the input on the device 53 if not isinstance(x, torch.Tensor): 54 return x 55 return torch.empty_strided( 56 x.size(), x.stride(), device=GPU_TYPE, dtype=x.dtype 57 ).copy_(x) 58 59 example_inputs = tuple(copy_fn(x) for x in example_inputs) 60 61 torch._dynamo.reset() 62 torch._inductor.codecache.FxGraphCache.clear() 63 torch._inductor.metrics.reset() 64 65 called = False 66 67 def compile_fx_wrapper(model_, example_inputs_): 68 nonlocal called 69 called = True 70 return compile_fx(model_, example_inputs_) 71 72 def run(*ex, **kwargs): 73 return model(*ex, **kwargs) 74 75 run = torch._dynamo.optimize(compile_fx_wrapper, nopython=True)(run) 76 77 if is_cpp_code: 78 _, code = run_and_get_cpp_code(run, *example_inputs, **kwargs) 79 _check_has_dynamic_shape(self, code) 80 else: 81 code = run_and_get_triton_code(run, *example_inputs, **kwargs) 82 self.assertTrue("def triton" in code, f"Failed to find triton kernel\n{code}") 83 84 assert called, "Ran graph without calling compile_fx" 85 86 torch._dynamo.reset() 87 88 89# xfail by default, set is_skip=True to skip 90test_failures = { 91 # 92 # Failed to find dynamic for loop variable (no kernels generated) 93 # 94 "test_fft_real_input_dynamic_shapes": TestFailure( 95 ("cpu", "cuda", "xpu"), is_skip=True 96 ), 97 "test_fft_real_input_real_output_dynamic_shapes": TestFailure( 98 ("cpu", "cuda", "xpu"), is_skip=True 99 ), 100 "test_to_device_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True), 101 # 102 # Failed to find dynamic for loop variable: 103 # 104 "test_arange1_dynamic_shapes": TestFailure(("cpu",)), 105 "test_arange2_dynamic_shapes": TestFailure(("cpu",)), 106 "test_arange3_dynamic_shapes": TestFailure(("cpu",)), 107 "test_arange4_dynamic_shapes": TestFailure(("cpu",)), 108 "test_arange6_dynamic_shapes": TestFailure(("cpu",)), 109 "test_clamp_type_promotion_dynamic_shapes": TestFailure(("cpu",)), 110 "test_conv2d_channels_last_dynamic_shapes": TestFailure(("cpu",)), 111 "test_conv3d_dynamic_shapes": TestFailure(("cpu",)), 112 "test_conv3d_channels_last_dynamic_shapes": TestFailure(("cpu",)), 113 "test_mutable_custom_op_fixed_layout2_dynamic_shapes": TestFailure(("cpu",)), 114 "test_expand_dynamic_shapes": TestFailure(("cpu",)), 115 "test_full_boolean_dynamic_shapes": TestFailure(("cpu",)), 116 "test_glu_dynamic_shapes": TestFailure(("cpu",)), 117 "test_isinf2_dynamic_shapes": TestFailure(("cpu",)), 118 "test_linspace1_dynamic_shapes": TestFailure(("cpu",)), 119 "test_masked_scatter_dynamic_shapes": TestFailure(("cpu",)), 120 "test_stack_dynamic_shapes": TestFailure(("cpu",)), 121 "test_tensor2_dynamic_shapes": TestFailure(("cpu",)), 122 "test_tensor3_dynamic_shapes": TestFailure(("cpu",)), 123 "test_to_device_constant_dynamic_shapes": TestFailure("cpu"), 124 "test_upsample_nearest2d_backward_dynamic_shapes": TestFailure(("cpu",)), 125 "test_views3_dynamic_shapes": TestFailure(("cpu",)), 126 "test_views4_dynamic_shapes": TestFailure(("cpu",)), 127 "test_zeros_dynamic_shapes": TestFailure(("cpu",)), 128 "test_uint_dynamic_shapes": TestFailure(("cpu",)), 129 "test_issue102546_dynamic_shapes": TestFailure(("cpu",)), 130 "test_repeat_as_strided_dynamic_shapes": TestFailure(("cpu",)), 131 "test_mul_index_expr_dynamic_shapes": TestFailure(("cpu",)), 132 "test_flip_cat_dynamic_shapes": TestFailure(("cpu",)), 133 # 134 # Failed to find for loop/triton kernel: 135 # 136 "test_complex_fallback_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 137 "test_adaptive_avg_pool2d2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 138 "test_adaptive_max_pool2d2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 139 "test_fractional_max_pool2d2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 140 "test_argmax_to_float_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 141 "test_avg_pool2d7_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 142 "test_avg_pool2d_backward4_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 143 "test_avg_pool3d_backward4_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 144 "test_baddbmm_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 145 "test_bmm2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 146 "test_both_scalars_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 147 "test_compar_dynamic_shapes": TestFailure(("cpu",)), 148 "test_const_int32_to_float_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 149 "test_conv2d_backward_channels_last_dynamic_shapes": TestFailure(("cpu",)), 150 "test_conv_backward_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 151 "test_conv_functional_bn_fuse_dynamic_shapes": TestFailure(("cpu",), is_skip=True), 152 "test_convolution2_dynamic_shapes": TestFailure(("cpu",)), 153 "test_cumprod_zero_dim_dynamic_shapes": TestFailure(("cpu",)), 154 "test_cumsum_dynamic_shapes": TestFailure(("cpu",)), 155 "test_cumsum_no_mask_dynamic_shapes": TestFailure(("cpu",)), 156 "test_cumsum_zero_dim_dynamic_shapes": TestFailure(("cpu",)), 157 "test_div8_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 158 "test_embedding_bag_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 159 "test_empty1_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 160 "test_empty2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 161 "test_empty_strided_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 162 "test_bucketize_dynamic_shapes": TestFailure("cpu"), 163 "test_bucketize_default_kwargs_dynamic_shapes": TestFailure("cpu"), 164 "test_bucketize_int_dynamic_shapes": TestFailure("cpu"), 165 "test_like_rands_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 166 "test_linspace2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 167 "test_linspace3_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 168 "test_logcumsumexp_dynamic_shapes": TestFailure(("cpu",)), 169 "test_logcumsumexp_zero_dim_dynamic_shapes": TestFailure(("cpu",)), 170 "test_max_pool2d6_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 171 "test_max_pool2d8_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 172 "test_max_pool2d_with_indices_backward5_dynamic_shapes": TestFailure( 173 ("cpu", "cuda") 174 ), 175 "test_max_pool2d_with_indices_backward6_dynamic_shapes": TestFailure( 176 ("cpu", "cuda", "xpu") 177 ), 178 "test_misaligned_address_issue1_dynamic_shapes": TestFailure(("cpu",)), 179 "test_mm_views_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 180 "test_new_empty_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 181 "test_new_empty_strided_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 182 "test_new_ones_dynamic_shapes": TestFailure(("cpu",)), 183 "test_permute2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 184 "test_pointwise_airy_ai_dynamic_shapes": TestFailure(("cuda", "xpu")), 185 "test_pointwise_digamma_dynamic_shapes": TestFailure(("cuda", "xpu")), 186 "test_pointwise_gammainc_dynamic_shapes": TestFailure(("cuda", "xpu")), 187 "test_pointwise_gammaincc_dynamic_shapes": TestFailure(("cuda", "xpu")), 188 "test_pointwise_i0e_dynamic_shapes": TestFailure(("cuda", "xpu")), 189 "test_pointwise_i1e_dynamic_shapes": TestFailure(("cuda", "xpu")), 190 "test_pointwise_modified_bessel_k0_dynamic_shapes": TestFailure(("cuda", "xpu")), 191 "test_pointwise_modified_bessel_k1_dynamic_shapes": TestFailure(("cuda", "xpu")), 192 "test_pointwise_ndtri_dynamic_shapes": TestFailure(("cuda", "xpu")), 193 "test_pointwise_polygamma_dynamic_shapes": TestFailure(("cuda", "xpu")), 194 "test_pointwise_psi_dynamic_shapes": TestFailure(("cuda", "xpu")), 195 "test_pointwise_scaled_modified_bessel_k0_dynamic_shapes": TestFailure( 196 ("cuda", "xpu") 197 ), 198 "test_pointwise_scaled_modified_bessel_k1_dynamic_shapes": TestFailure( 199 ("cuda", "xpu") 200 ), 201 "test_pointwise_spherical_bessel_j0_dynamic_shapes": TestFailure(("cuda", "xpu")), 202 "test_pointwise_zeta_dynamic_shapes": TestFailure(("cuda", "xpu")), 203 "test_pointwise_chebyshev_polynomial_t_dynamic_shapes": TestFailure( 204 ("cuda", "xpu") 205 ), 206 "test_pointwise_chebyshev_polynomial_u_dynamic_shapes": TestFailure( 207 ("cuda", "xpu") 208 ), 209 "test_pointwise_chebyshev_polynomial_v_dynamic_shapes": TestFailure( 210 ("cuda", "xpu") 211 ), 212 "test_pointwise_chebyshev_polynomial_w_dynamic_shapes": TestFailure( 213 ("cuda", "xpu") 214 ), 215 "test_pointwise_shifted_chebyshev_polynomial_t_dynamic_shapes": TestFailure( 216 ("cuda", "xpu") 217 ), 218 "test_pointwise_shifted_chebyshev_polynomial_u_dynamic_shapes": TestFailure( 219 ("cuda", "xpu") 220 ), 221 "test_pointwise_shifted_chebyshev_polynomial_v_dynamic_shapes": TestFailure( 222 ("cuda", "xpu") 223 ), 224 "test_pointwise_shifted_chebyshev_polynomial_w_dynamic_shapes": TestFailure( 225 ("cuda", "xpu") 226 ), 227 "test_pointwise_hermite_polynomial_h_dynamic_shapes": TestFailure(("cuda", "xpu")), 228 "test_pointwise_hermite_polynomial_he_dynamic_shapes": TestFailure(("cuda", "xpu")), 229 "test_pointwise_laguerre_polynomial_l_dynamic_shapes": TestFailure(("cuda", "xpu")), 230 "test_pointwise_legendre_polynomial_p_dynamic_shapes": TestFailure(("cuda", "xpu")), 231 "test_polar_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True), 232 "test_randn_generator_dynamic_shapes": TestFailure(("cpu",)), 233 "test_randn_like_empty_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 234 "test_single_elem_dynamic_shapes": TestFailure(("cpu",)), 235 "test_single_elem_indirect_dynamic_shapes": TestFailure(("cpu",)), 236 "test_sort_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 237 "test_sort_stable_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 238 "test_sort_transpose_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 239 "test_split_cumsum_dynamic_shapes": TestFailure(("cpu",)), 240 "test_split_cumsum_low_prec_dynamic_shapes": TestFailure(("cpu",)), 241 "test_split_cumprod_dynamic_shapes": TestFailure(("cpu",)), 242 "test_split_cumprod_low_prec_dynamic_shapes": TestFailure(("cpu",)), 243 "test_split_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 244 "test_topk_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 245 "test_unbind_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 246 "test_views5_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 247 "test_view_detach_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 248 "test_view_on_aliased_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 249 "test_linear_float64_dynamic_shapes": TestFailure("cpu"), 250 "test_adaptive_avg_pool_with_output_size_0_dynamic_shapes": TestFailure( 251 ("cpu", "cuda", "xpu") 252 ), 253 "test_zero_element_mutation_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 254 "test_custom_op_3_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 255 "test_custom_op_fixed_layout_sequential_dynamic_shapes": TestFailure( 256 ("cpu", "cuda", "xpu") 257 ), 258 "test_cat_uint8_dynamic_shapes": TestFailure( 259 ("cpu",) 260 ), # cat on uint8 input is using aten fallback on cpu 261 # 262 # Tests not using 'common' or directly calling 'assertEqual': 263 # 264 "test_arange5_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True), 265 "test_cat_inplace_dynamic_shapes": TestFailure( 266 ("cpu", "cuda", "xpu"), is_skip=True 267 ), 268 "test_cat_of_loops_and_extern_kernel_dynamic_shapes": TestFailure( 269 ("cpu", "cuda", "xpu"), is_skip=True 270 ), 271 # need to enable CL with dynamic shapes 272 "test_scaled_dot_product_efficient_attention_dynamic_shapes": TestFailure( 273 ("cpu", "cuda", "xpu"), is_skip=True 274 ), 275 "test_dropout_deterministic_dynamic_shapes": TestFailure( 276 ("cpu", "cuda", "xpu"), is_skip=True 277 ), 278 "test_dropout_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True), 279 "test_dtype_mismatch_issue_dynamic_shapes": TestFailure( 280 ("cpu", "cuda", "xpu"), is_skip=True 281 ), 282 "test_forced_buffer_realize_dynamic_shapes": TestFailure( 283 ("cpu", "cuda", "xpu"), is_skip=True 284 ), 285 "test_tmp_not_defined_issue3_dynamic_shapes": TestFailure(("cpu",), is_skip=True), 286 "test_gather2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True), 287 "test_inplace_add_dynamic_shapes": TestFailure( 288 ("cpu", "cuda", "xpu"), is_skip=True 289 ), 290 "test_inplace_mixed_dtype_ops_dynamic_shapes": TestFailure( 291 ("cpu", "cuda", "xpu"), is_skip=True 292 ), 293 "test_input_mutation1_dynamic_shapes": TestFailure( 294 ("cpu", "cuda", "xpu"), is_skip=True 295 ), 296 "test_input_mutation2_dynamic_shapes": TestFailure( 297 ("cpu", "cuda", "xpu"), is_skip=True 298 ), 299 "test_input_mutation3_dynamic_shapes": TestFailure( 300 ("cpu", "cuda", "xpu"), is_skip=True 301 ), 302 "test_input_mutation4_dynamic_shapes": TestFailure( 303 ("cpu", "cuda", "xpu"), is_skip=True 304 ), 305 "test_kernel_names_dynamic_shapes": TestFailure( 306 ("cpu", "cuda", "xpu"), is_skip=True 307 ), 308 "test_lerp_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True), 309 "test_linear_buffer_reuse_dynamic_shapes": TestFailure( 310 ("cpu", "cuda", "xpu"), is_skip=True 311 ), 312 "test_list_clearing_dynamic_shapes": TestFailure( 313 ("cpu", "cuda", "xpu"), is_skip=True 314 ), 315 "test_dropout_trivial_1_dynamic_shapes": TestFailure( 316 ("cpu", "cuda", "xpu"), is_skip=True 317 ), 318 "test_dropout2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True), 319 "test_dropout3_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True), 320 "test_masked_fill_promotion_dynamic_shapes": TestFailure( 321 ("cpu", "cuda", "xpu"), is_skip=True 322 ), 323 "test_min_max_reduction_dynamic_shapes": TestFailure( 324 ("cpu", "cuda", "xpu"), is_skip=True 325 ), 326 "test_multi_gpu_recompile_on_index_dynamic_shapes": TestFailure( 327 ("cpu", "cuda", "xpu"), is_skip=True 328 ), 329 "test_output_strides_dynamic_shapes": TestFailure( 330 ("cpu", "cuda", "xpu"), is_skip=True 331 ), 332 "test_pow3_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True), 333 "test_profiler_mark_wrapper_call_dynamic_shapes": TestFailure( 334 ("cpu", "cuda", "xpu"), is_skip=True 335 ), 336 "test_rand_like_deterministic_dynamic_shapes": TestFailure( 337 ("cpu", "cuda", "xpu"), is_skip=True 338 ), 339 "test_repeat_interleave_2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 340 "test_slice_mutation2_dynamic_shapes": TestFailure( 341 ("cpu", "cuda", "xpu"), is_skip=True 342 ), 343 "test_strided_inputs_dynamic_shapes": TestFailure( 344 ("cpu", "cuda", "xpu"), is_skip=True 345 ), 346 "test_transposed_propagates_dynamic_shapes": TestFailure( 347 ("cpu", "cuda", "xpu"), is_skip=True 348 ), 349 "test_require_stride_expanded_dynamic_shapes": TestFailure( 350 ("cpu", "cuda", "xpu"), is_skip=True 351 ), 352 "test_unspec_inputs_dynamic_shapes": TestFailure( 353 ("cpu", "cuda", "xpu"), is_skip=True 354 ), 355 "test_zero_dim_reductions_dynamic_shapes": TestFailure( 356 ("cpu", "cuda", "xpu"), is_skip=True 357 ), 358 "test_sdpa_dynamic_shapes": TestFailure(("cpu",), is_skip=True), 359 "test_sdpa_unaligned_mask_dynamic_shapes": TestFailure(("cpu",), is_skip=True), 360 # 361 # The following tests do not support dynamic shapes yet: 362 # 363 "test_cudnn_rnn_dynamic_shapes": TestFailure(("cuda",)), 364 # test_roi_align uses torchvision, which doesn't work with dynamic shapes 365 "test_roi_align_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 366 "test_aliased_buffer_reuse_dynamic_shapes": TestFailure(("cpu",)), 367 # The input of this case has only 1 elements 368 "test_mutations_loop_fusion_dynamic_shapes": TestFailure( 369 ("cpu", "cuda", "xpu"), is_skip=True 370 ), 371 # Refinement means we don't actually generate dynamic shapes (but only on 372 # cpu apparently?!) 373 "test_nonzero_unbacked_refinement_dynamic_shapes": TestFailure(("cpu",)), 374 **dynamic_shapes_test_failures, 375} 376 377if TEST_WITH_ROCM: 378 test_failures.update( 379 { 380 "test_split_cumsum_dynamic_shapes": TestFailure(("cpu", "cuda")), 381 "test_split_cumsum_low_prec_dynamic_shapes": TestFailure(("cpu", "cuda")), 382 "test_split_cumprod_dynamic_shapes": TestFailure(("cpu", "cuda")), 383 "test_split_cumprod_low_prec_dynamic_shapes": TestFailure(("cpu", "cuda")), 384 } 385 ) 386 387DynamicShapesCodegenCommonTemplate = make_dynamic_cls( 388 CommonTemplate, xfail_prop="_expected_failure_codegen_dynamic" 389) 390 391 392if HAS_CPU: 393 394 class DynamicShapesCodegenCpuTests(TestCase): 395 maxDiff = None 396 device = "cpu" 397 398 def common(self: TestCase, model, example_inputs, kwargs=None, **_rest): 399 return check_codegen( 400 self=self, 401 model=model, 402 example_inputs=example_inputs, 403 kwargs=kwargs, 404 is_cpp_code=True, 405 ) 406 407 copy_tests( 408 DynamicShapesCodegenCommonTemplate, 409 DynamicShapesCodegenCpuTests, 410 "cpu", 411 test_failures, 412 ) 413 414 415if HAS_GPU and not TEST_WITH_ASAN: 416 417 class DynamicShapesCodegenGPUTests(TestCase): 418 maxDiff = None 419 device = GPU_TYPE 420 421 def common(self: TestCase, model, example_inputs, kwargs=None, **_rest): 422 return check_codegen( 423 self=self, 424 model=model, 425 example_inputs=example_inputs, 426 kwargs=kwargs, 427 is_cpp_code=False, 428 ) 429 430 copy_tests( 431 DynamicShapesCodegenCommonTemplate, 432 DynamicShapesCodegenGPUTests, 433 GPU_TYPE, 434 test_failures, 435 ) 436 437 438if __name__ == "__main__": 439 from torch._inductor.test_case import run_tests 440 441 if HAS_CPU or HAS_GPU: 442 run_tests(needs="filelock") 443