xref: /aosp_15_r20/external/pytorch/test/inductor/test_torchinductor_codegen_dynamic_shapes.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1# Owner(s): ["module: inductor"]
2import importlib
3import os
4import sys
5
6import torch
7from torch._inductor.compile_fx import compile_fx
8from torch._inductor.test_case import TestCase
9from torch.testing._internal.common_utils import TEST_WITH_ASAN, TEST_WITH_ROCM
10from torch.testing._internal.inductor_utils import (
11    _check_has_dynamic_shape,
12    GPU_TYPE,
13    HAS_CPU,
14    HAS_GPU,
15)
16
17
18importlib.import_module("filelock")
19
20# Make the helper files in test/ importable
21pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
22sys.path.append(pytorch_test_dir)
23from inductor.test_torchinductor import (
24    CommonTemplate,
25    copy_tests,
26    run_and_get_cpp_code,
27    run_and_get_triton_code,
28    TestFailure,
29)
30from inductor.test_torchinductor_dynamic_shapes import (
31    make_dynamic_cls,
32    test_failures as dynamic_shapes_test_failures,
33)
34
35
36# Checks for patterns in generated C++/Triton code to see if it's dynamic
37def check_codegen(
38    self: TestCase,
39    model,
40    example_inputs,
41    kwargs=None,
42    *,
43    is_cpp_code: bool,
44):
45    kwargs = kwargs or {}
46
47    if is_cpp_code is False:
48        if hasattr(model, "to"):
49            model = model.to(device=GPU_TYPE)
50
51        def copy_fn(x):
52            # preserve strides of the input on the device
53            if not isinstance(x, torch.Tensor):
54                return x
55            return torch.empty_strided(
56                x.size(), x.stride(), device=GPU_TYPE, dtype=x.dtype
57            ).copy_(x)
58
59        example_inputs = tuple(copy_fn(x) for x in example_inputs)
60
61    torch._dynamo.reset()
62    torch._inductor.codecache.FxGraphCache.clear()
63    torch._inductor.metrics.reset()
64
65    called = False
66
67    def compile_fx_wrapper(model_, example_inputs_):
68        nonlocal called
69        called = True
70        return compile_fx(model_, example_inputs_)
71
72    def run(*ex, **kwargs):
73        return model(*ex, **kwargs)
74
75    run = torch._dynamo.optimize(compile_fx_wrapper, nopython=True)(run)
76
77    if is_cpp_code:
78        _, code = run_and_get_cpp_code(run, *example_inputs, **kwargs)
79        _check_has_dynamic_shape(self, code)
80    else:
81        code = run_and_get_triton_code(run, *example_inputs, **kwargs)
82        self.assertTrue("def triton" in code, f"Failed to find triton kernel\n{code}")
83
84    assert called, "Ran graph without calling compile_fx"
85
86    torch._dynamo.reset()
87
88
89# xfail by default, set is_skip=True to skip
90test_failures = {
91    #
92    # Failed to find dynamic for loop variable (no kernels generated)
93    #
94    "test_fft_real_input_dynamic_shapes": TestFailure(
95        ("cpu", "cuda", "xpu"), is_skip=True
96    ),
97    "test_fft_real_input_real_output_dynamic_shapes": TestFailure(
98        ("cpu", "cuda", "xpu"), is_skip=True
99    ),
100    "test_to_device_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True),
101    #
102    # Failed to find dynamic for loop variable:
103    #
104    "test_arange1_dynamic_shapes": TestFailure(("cpu",)),
105    "test_arange2_dynamic_shapes": TestFailure(("cpu",)),
106    "test_arange3_dynamic_shapes": TestFailure(("cpu",)),
107    "test_arange4_dynamic_shapes": TestFailure(("cpu",)),
108    "test_arange6_dynamic_shapes": TestFailure(("cpu",)),
109    "test_clamp_type_promotion_dynamic_shapes": TestFailure(("cpu",)),
110    "test_conv2d_channels_last_dynamic_shapes": TestFailure(("cpu",)),
111    "test_conv3d_dynamic_shapes": TestFailure(("cpu",)),
112    "test_conv3d_channels_last_dynamic_shapes": TestFailure(("cpu",)),
113    "test_mutable_custom_op_fixed_layout2_dynamic_shapes": TestFailure(("cpu",)),
114    "test_expand_dynamic_shapes": TestFailure(("cpu",)),
115    "test_full_boolean_dynamic_shapes": TestFailure(("cpu",)),
116    "test_glu_dynamic_shapes": TestFailure(("cpu",)),
117    "test_isinf2_dynamic_shapes": TestFailure(("cpu",)),
118    "test_linspace1_dynamic_shapes": TestFailure(("cpu",)),
119    "test_masked_scatter_dynamic_shapes": TestFailure(("cpu",)),
120    "test_stack_dynamic_shapes": TestFailure(("cpu",)),
121    "test_tensor2_dynamic_shapes": TestFailure(("cpu",)),
122    "test_tensor3_dynamic_shapes": TestFailure(("cpu",)),
123    "test_to_device_constant_dynamic_shapes": TestFailure("cpu"),
124    "test_upsample_nearest2d_backward_dynamic_shapes": TestFailure(("cpu",)),
125    "test_views3_dynamic_shapes": TestFailure(("cpu",)),
126    "test_views4_dynamic_shapes": TestFailure(("cpu",)),
127    "test_zeros_dynamic_shapes": TestFailure(("cpu",)),
128    "test_uint_dynamic_shapes": TestFailure(("cpu",)),
129    "test_issue102546_dynamic_shapes": TestFailure(("cpu",)),
130    "test_repeat_as_strided_dynamic_shapes": TestFailure(("cpu",)),
131    "test_mul_index_expr_dynamic_shapes": TestFailure(("cpu",)),
132    "test_flip_cat_dynamic_shapes": TestFailure(("cpu",)),
133    #
134    # Failed to find for loop/triton kernel:
135    #
136    "test_complex_fallback_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
137    "test_adaptive_avg_pool2d2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
138    "test_adaptive_max_pool2d2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
139    "test_fractional_max_pool2d2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
140    "test_argmax_to_float_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
141    "test_avg_pool2d7_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
142    "test_avg_pool2d_backward4_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
143    "test_avg_pool3d_backward4_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
144    "test_baddbmm_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
145    "test_bmm2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
146    "test_both_scalars_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
147    "test_compar_dynamic_shapes": TestFailure(("cpu",)),
148    "test_const_int32_to_float_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
149    "test_conv2d_backward_channels_last_dynamic_shapes": TestFailure(("cpu",)),
150    "test_conv_backward_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
151    "test_conv_functional_bn_fuse_dynamic_shapes": TestFailure(("cpu",), is_skip=True),
152    "test_convolution2_dynamic_shapes": TestFailure(("cpu",)),
153    "test_cumprod_zero_dim_dynamic_shapes": TestFailure(("cpu",)),
154    "test_cumsum_dynamic_shapes": TestFailure(("cpu",)),
155    "test_cumsum_no_mask_dynamic_shapes": TestFailure(("cpu",)),
156    "test_cumsum_zero_dim_dynamic_shapes": TestFailure(("cpu",)),
157    "test_div8_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
158    "test_embedding_bag_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
159    "test_empty1_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
160    "test_empty2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
161    "test_empty_strided_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
162    "test_bucketize_dynamic_shapes": TestFailure("cpu"),
163    "test_bucketize_default_kwargs_dynamic_shapes": TestFailure("cpu"),
164    "test_bucketize_int_dynamic_shapes": TestFailure("cpu"),
165    "test_like_rands_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
166    "test_linspace2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
167    "test_linspace3_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
168    "test_logcumsumexp_dynamic_shapes": TestFailure(("cpu",)),
169    "test_logcumsumexp_zero_dim_dynamic_shapes": TestFailure(("cpu",)),
170    "test_max_pool2d6_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
171    "test_max_pool2d8_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
172    "test_max_pool2d_with_indices_backward5_dynamic_shapes": TestFailure(
173        ("cpu", "cuda")
174    ),
175    "test_max_pool2d_with_indices_backward6_dynamic_shapes": TestFailure(
176        ("cpu", "cuda", "xpu")
177    ),
178    "test_misaligned_address_issue1_dynamic_shapes": TestFailure(("cpu",)),
179    "test_mm_views_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
180    "test_new_empty_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
181    "test_new_empty_strided_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
182    "test_new_ones_dynamic_shapes": TestFailure(("cpu",)),
183    "test_permute2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
184    "test_pointwise_airy_ai_dynamic_shapes": TestFailure(("cuda", "xpu")),
185    "test_pointwise_digamma_dynamic_shapes": TestFailure(("cuda", "xpu")),
186    "test_pointwise_gammainc_dynamic_shapes": TestFailure(("cuda", "xpu")),
187    "test_pointwise_gammaincc_dynamic_shapes": TestFailure(("cuda", "xpu")),
188    "test_pointwise_i0e_dynamic_shapes": TestFailure(("cuda", "xpu")),
189    "test_pointwise_i1e_dynamic_shapes": TestFailure(("cuda", "xpu")),
190    "test_pointwise_modified_bessel_k0_dynamic_shapes": TestFailure(("cuda", "xpu")),
191    "test_pointwise_modified_bessel_k1_dynamic_shapes": TestFailure(("cuda", "xpu")),
192    "test_pointwise_ndtri_dynamic_shapes": TestFailure(("cuda", "xpu")),
193    "test_pointwise_polygamma_dynamic_shapes": TestFailure(("cuda", "xpu")),
194    "test_pointwise_psi_dynamic_shapes": TestFailure(("cuda", "xpu")),
195    "test_pointwise_scaled_modified_bessel_k0_dynamic_shapes": TestFailure(
196        ("cuda", "xpu")
197    ),
198    "test_pointwise_scaled_modified_bessel_k1_dynamic_shapes": TestFailure(
199        ("cuda", "xpu")
200    ),
201    "test_pointwise_spherical_bessel_j0_dynamic_shapes": TestFailure(("cuda", "xpu")),
202    "test_pointwise_zeta_dynamic_shapes": TestFailure(("cuda", "xpu")),
203    "test_pointwise_chebyshev_polynomial_t_dynamic_shapes": TestFailure(
204        ("cuda", "xpu")
205    ),
206    "test_pointwise_chebyshev_polynomial_u_dynamic_shapes": TestFailure(
207        ("cuda", "xpu")
208    ),
209    "test_pointwise_chebyshev_polynomial_v_dynamic_shapes": TestFailure(
210        ("cuda", "xpu")
211    ),
212    "test_pointwise_chebyshev_polynomial_w_dynamic_shapes": TestFailure(
213        ("cuda", "xpu")
214    ),
215    "test_pointwise_shifted_chebyshev_polynomial_t_dynamic_shapes": TestFailure(
216        ("cuda", "xpu")
217    ),
218    "test_pointwise_shifted_chebyshev_polynomial_u_dynamic_shapes": TestFailure(
219        ("cuda", "xpu")
220    ),
221    "test_pointwise_shifted_chebyshev_polynomial_v_dynamic_shapes": TestFailure(
222        ("cuda", "xpu")
223    ),
224    "test_pointwise_shifted_chebyshev_polynomial_w_dynamic_shapes": TestFailure(
225        ("cuda", "xpu")
226    ),
227    "test_pointwise_hermite_polynomial_h_dynamic_shapes": TestFailure(("cuda", "xpu")),
228    "test_pointwise_hermite_polynomial_he_dynamic_shapes": TestFailure(("cuda", "xpu")),
229    "test_pointwise_laguerre_polynomial_l_dynamic_shapes": TestFailure(("cuda", "xpu")),
230    "test_pointwise_legendre_polynomial_p_dynamic_shapes": TestFailure(("cuda", "xpu")),
231    "test_polar_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True),
232    "test_randn_generator_dynamic_shapes": TestFailure(("cpu",)),
233    "test_randn_like_empty_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
234    "test_single_elem_dynamic_shapes": TestFailure(("cpu",)),
235    "test_single_elem_indirect_dynamic_shapes": TestFailure(("cpu",)),
236    "test_sort_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
237    "test_sort_stable_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
238    "test_sort_transpose_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
239    "test_split_cumsum_dynamic_shapes": TestFailure(("cpu",)),
240    "test_split_cumsum_low_prec_dynamic_shapes": TestFailure(("cpu",)),
241    "test_split_cumprod_dynamic_shapes": TestFailure(("cpu",)),
242    "test_split_cumprod_low_prec_dynamic_shapes": TestFailure(("cpu",)),
243    "test_split_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
244    "test_topk_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
245    "test_unbind_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
246    "test_views5_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
247    "test_view_detach_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
248    "test_view_on_aliased_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
249    "test_linear_float64_dynamic_shapes": TestFailure("cpu"),
250    "test_adaptive_avg_pool_with_output_size_0_dynamic_shapes": TestFailure(
251        ("cpu", "cuda", "xpu")
252    ),
253    "test_zero_element_mutation_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
254    "test_custom_op_3_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
255    "test_custom_op_fixed_layout_sequential_dynamic_shapes": TestFailure(
256        ("cpu", "cuda", "xpu")
257    ),
258    "test_cat_uint8_dynamic_shapes": TestFailure(
259        ("cpu",)
260    ),  # cat on uint8 input is using aten fallback on cpu
261    #
262    # Tests not using 'common' or directly calling 'assertEqual':
263    #
264    "test_arange5_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True),
265    "test_cat_inplace_dynamic_shapes": TestFailure(
266        ("cpu", "cuda", "xpu"), is_skip=True
267    ),
268    "test_cat_of_loops_and_extern_kernel_dynamic_shapes": TestFailure(
269        ("cpu", "cuda", "xpu"), is_skip=True
270    ),
271    # need to enable CL with dynamic shapes
272    "test_scaled_dot_product_efficient_attention_dynamic_shapes": TestFailure(
273        ("cpu", "cuda", "xpu"), is_skip=True
274    ),
275    "test_dropout_deterministic_dynamic_shapes": TestFailure(
276        ("cpu", "cuda", "xpu"), is_skip=True
277    ),
278    "test_dropout_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True),
279    "test_dtype_mismatch_issue_dynamic_shapes": TestFailure(
280        ("cpu", "cuda", "xpu"), is_skip=True
281    ),
282    "test_forced_buffer_realize_dynamic_shapes": TestFailure(
283        ("cpu", "cuda", "xpu"), is_skip=True
284    ),
285    "test_tmp_not_defined_issue3_dynamic_shapes": TestFailure(("cpu",), is_skip=True),
286    "test_gather2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True),
287    "test_inplace_add_dynamic_shapes": TestFailure(
288        ("cpu", "cuda", "xpu"), is_skip=True
289    ),
290    "test_inplace_mixed_dtype_ops_dynamic_shapes": TestFailure(
291        ("cpu", "cuda", "xpu"), is_skip=True
292    ),
293    "test_input_mutation1_dynamic_shapes": TestFailure(
294        ("cpu", "cuda", "xpu"), is_skip=True
295    ),
296    "test_input_mutation2_dynamic_shapes": TestFailure(
297        ("cpu", "cuda", "xpu"), is_skip=True
298    ),
299    "test_input_mutation3_dynamic_shapes": TestFailure(
300        ("cpu", "cuda", "xpu"), is_skip=True
301    ),
302    "test_input_mutation4_dynamic_shapes": TestFailure(
303        ("cpu", "cuda", "xpu"), is_skip=True
304    ),
305    "test_kernel_names_dynamic_shapes": TestFailure(
306        ("cpu", "cuda", "xpu"), is_skip=True
307    ),
308    "test_lerp_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True),
309    "test_linear_buffer_reuse_dynamic_shapes": TestFailure(
310        ("cpu", "cuda", "xpu"), is_skip=True
311    ),
312    "test_list_clearing_dynamic_shapes": TestFailure(
313        ("cpu", "cuda", "xpu"), is_skip=True
314    ),
315    "test_dropout_trivial_1_dynamic_shapes": TestFailure(
316        ("cpu", "cuda", "xpu"), is_skip=True
317    ),
318    "test_dropout2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True),
319    "test_dropout3_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True),
320    "test_masked_fill_promotion_dynamic_shapes": TestFailure(
321        ("cpu", "cuda", "xpu"), is_skip=True
322    ),
323    "test_min_max_reduction_dynamic_shapes": TestFailure(
324        ("cpu", "cuda", "xpu"), is_skip=True
325    ),
326    "test_multi_gpu_recompile_on_index_dynamic_shapes": TestFailure(
327        ("cpu", "cuda", "xpu"), is_skip=True
328    ),
329    "test_output_strides_dynamic_shapes": TestFailure(
330        ("cpu", "cuda", "xpu"), is_skip=True
331    ),
332    "test_pow3_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True),
333    "test_profiler_mark_wrapper_call_dynamic_shapes": TestFailure(
334        ("cpu", "cuda", "xpu"), is_skip=True
335    ),
336    "test_rand_like_deterministic_dynamic_shapes": TestFailure(
337        ("cpu", "cuda", "xpu"), is_skip=True
338    ),
339    "test_repeat_interleave_2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
340    "test_slice_mutation2_dynamic_shapes": TestFailure(
341        ("cpu", "cuda", "xpu"), is_skip=True
342    ),
343    "test_strided_inputs_dynamic_shapes": TestFailure(
344        ("cpu", "cuda", "xpu"), is_skip=True
345    ),
346    "test_transposed_propagates_dynamic_shapes": TestFailure(
347        ("cpu", "cuda", "xpu"), is_skip=True
348    ),
349    "test_require_stride_expanded_dynamic_shapes": TestFailure(
350        ("cpu", "cuda", "xpu"), is_skip=True
351    ),
352    "test_unspec_inputs_dynamic_shapes": TestFailure(
353        ("cpu", "cuda", "xpu"), is_skip=True
354    ),
355    "test_zero_dim_reductions_dynamic_shapes": TestFailure(
356        ("cpu", "cuda", "xpu"), is_skip=True
357    ),
358    "test_sdpa_dynamic_shapes": TestFailure(("cpu",), is_skip=True),
359    "test_sdpa_unaligned_mask_dynamic_shapes": TestFailure(("cpu",), is_skip=True),
360    #
361    # The following tests do not support dynamic shapes yet:
362    #
363    "test_cudnn_rnn_dynamic_shapes": TestFailure(("cuda",)),
364    # test_roi_align uses torchvision, which doesn't work with dynamic shapes
365    "test_roi_align_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
366    "test_aliased_buffer_reuse_dynamic_shapes": TestFailure(("cpu",)),
367    # The input of this case has only 1 elements
368    "test_mutations_loop_fusion_dynamic_shapes": TestFailure(
369        ("cpu", "cuda", "xpu"), is_skip=True
370    ),
371    # Refinement means we don't actually generate dynamic shapes (but only on
372    # cpu apparently?!)
373    "test_nonzero_unbacked_refinement_dynamic_shapes": TestFailure(("cpu",)),
374    **dynamic_shapes_test_failures,
375}
376
377if TEST_WITH_ROCM:
378    test_failures.update(
379        {
380            "test_split_cumsum_dynamic_shapes": TestFailure(("cpu", "cuda")),
381            "test_split_cumsum_low_prec_dynamic_shapes": TestFailure(("cpu", "cuda")),
382            "test_split_cumprod_dynamic_shapes": TestFailure(("cpu", "cuda")),
383            "test_split_cumprod_low_prec_dynamic_shapes": TestFailure(("cpu", "cuda")),
384        }
385    )
386
387DynamicShapesCodegenCommonTemplate = make_dynamic_cls(
388    CommonTemplate, xfail_prop="_expected_failure_codegen_dynamic"
389)
390
391
392if HAS_CPU:
393
394    class DynamicShapesCodegenCpuTests(TestCase):
395        maxDiff = None
396        device = "cpu"
397
398        def common(self: TestCase, model, example_inputs, kwargs=None, **_rest):
399            return check_codegen(
400                self=self,
401                model=model,
402                example_inputs=example_inputs,
403                kwargs=kwargs,
404                is_cpp_code=True,
405            )
406
407    copy_tests(
408        DynamicShapesCodegenCommonTemplate,
409        DynamicShapesCodegenCpuTests,
410        "cpu",
411        test_failures,
412    )
413
414
415if HAS_GPU and not TEST_WITH_ASAN:
416
417    class DynamicShapesCodegenGPUTests(TestCase):
418        maxDiff = None
419        device = GPU_TYPE
420
421        def common(self: TestCase, model, example_inputs, kwargs=None, **_rest):
422            return check_codegen(
423                self=self,
424                model=model,
425                example_inputs=example_inputs,
426                kwargs=kwargs,
427                is_cpp_code=False,
428            )
429
430    copy_tests(
431        DynamicShapesCodegenCommonTemplate,
432        DynamicShapesCodegenGPUTests,
433        GPU_TYPE,
434        test_failures,
435    )
436
437
438if __name__ == "__main__":
439    from torch._inductor.test_case import run_tests
440
441    if HAS_CPU or HAS_GPU:
442        run_tests(needs="filelock")
443