/aosp_15_r20/external/pytorch/aten/src/ATen/native/transformers/cuda/ |
H A D | attention_backward.cu | 75 const Tensor& philox_offset, in _flash_attention_backward() argument 137 philox_offset); in _flash_attention_backward() 159 philox_offset); in _flash_attention_backward() 175 const Tensor& philox_offset, in _scaled_dot_product_cudnn_attention_backward_cuda() argument 243 philox_offset/*Tensor& dropoutoffset*/); in _scaled_dot_product_cudnn_attention_backward_cuda() 268 const at::Tensor& philox_offset, // offset into random number sequence in _efficient_attention_backward() argument 401 *philox_offset.data_ptr<int64_t>()); in _efficient_attention_backward() 405 philox_offset.data_ptr<int64_t>(), in _efficient_attention_backward() 463 mk_aoscalartensor(philox_offset), in _efficient_attention_backward() 746 const at::Tensor& philox_offset, in _scaled_dot_product_flash_attention_backward_cuda() argument [all …]
|
H A D | attention.cu | 716 philox_offset, in _scaled_dot_product_flash_attention_cuda() 735 …sor(), Tensor(), max_seqlen_batch_q, max_seqlen_batch_k, philox_seed, philox_offset, debug_attn_ma… in _scaled_dot_product_flash_attention_cuda() 879 philox_seed, philox_offset, debug_attn_mask; in _flash_attention_forward() local 888 philox_offset, in _flash_attention_forward() 917 philox_offset, in _flash_attention_forward() 939 std::move(philox_offset), in _flash_attention_forward()
|
/aosp_15_r20/external/pytorch/aten/src/ATen/xpu/ |
H A D | XPUGeneratorImpl.cpp | 133 uint64_t philox_offset; in set_state() local 134 memcpy(&philox_offset, new_rng_state + seed_size, offset_size); in set_state() 135 this->set_philox_offset_per_thread(philox_offset); in set_state()
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/nested/cuda/ |
H A D | NestedTensorTransformerFunctions.cpp | 251 philox_offset, in _scaled_dot_product_flash_attention_nestedtensor_cuda() 277 philox_offset, in _scaled_dot_product_flash_attention_nestedtensor_cuda() 340 const at::Tensor& philox_offset, in _scaled_dot_product_flash_attention_backward_nested() argument 376 philox_offset, in _scaled_dot_product_flash_attention_backward_nested()
|
/aosp_15_r20/external/pytorch/torch/distributed/tensor/experimental/ |
H A D | _attention.py | 472 philox_offset: torch.Tensor, 493 philox_offset=philox_offset, 508 philox_offset: torch.Tensor, 527 philox_offset=philox_offset,
|
/aosp_15_r20/external/pytorch/aten/src/ATen/cuda/ |
H A D | CUDAGeneratorImpl.cpp | 366 int64_t philox_offset = 0; in set_state() local 368 memcpy(&philox_offset, new_rng_state + seed_size, offset_size); in set_state() 370 this->set_philox_offset_per_thread(static_cast<uint64_t>(philox_offset)); in set_state()
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/transformers/cuda/flash_attn/ |
H A D | flash_api.h | 64 const at::Tensor philox_offset); 89 const at::Tensor philox_offset);
|
H A D | flash_api.cpp | 824 const at::Tensor philox_offset) { in mha_bwd() argument 990 …philox_args = at::PhiloxCudaState(*philox_seed.data_ptr<int64_t>(), *philox_offset.data_ptr<int64_… in mha_bwd() 993 philox_seed.data_ptr<int64_t>(), philox_offset.data_ptr<int64_t>(), 0); in mha_bwd() 1040 const at::Tensor philox_offset) in mha_varlen_bwd() argument 1224 …philox_args = at::PhiloxCudaState(*philox_seed.data_ptr<int64_t>(), *philox_offset.data_ptr<int64_… in mha_varlen_bwd() 1227 philox_seed.data_ptr<int64_t>(), philox_offset.data_ptr<int64_t>(), 0); in mha_varlen_bwd()
|
/aosp_15_r20/external/pytorch/torch/csrc/inductor/aoti_torch/generated/ |
H A D | c_shim_cuda.h | 22 …exp, double dropout_p, AtenTensorHandle philox_seed, AtenTensorHandle philox_offset, int64_t custo… 31 …_p, int32_t is_causal, AtenTensorHandle philox_seed, AtenTensorHandle philox_offset, double* scale… 38 …ensorHandle logsumexp, AtenTensorHandle philox_seed, AtenTensorHandle philox_offset, AtenTensorHan… 40 …ensorHandle logsumexp, AtenTensorHandle philox_seed, AtenTensorHandle philox_offset, double dropou… 42 …_p, int32_t is_causal, AtenTensorHandle philox_seed, AtenTensorHandle philox_offset, double* scale…
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/transformers/hip/flash_attn/ |
H A D | flash_api.hip | 308 const at::Tensor philox_offset) { 416 …philox_args = at::PhiloxCudaState(*philox_seed.data_ptr<int64_t>(), *philox_offset.data_ptr<int64_… 418 …philox_args = at::PhiloxCudaState(philox_seed.data_ptr<int64_t>(), philox_offset.data_ptr<int64_t>… 457 mk_aoscalartensor(philox_offset), 496 const at::Tensor philox_offset) {
|
/aosp_15_r20/external/pytorch/test/cpp_extensions/ |
H A D | open_registration_extension.cpp | 467 auto philox_offset = at::empty({}, at::dtype(at::kLong)); in custom_scaled_dot_product_fused_attention_overrideable() local 469 …at::Tensor(), at::Tensor(), max_seqlen_q, max_seqlen_kv, philox_seed, philox_offset, debug_attn_ma… in custom_scaled_dot_product_fused_attention_overrideable() 488 const at::Tensor & philox_offset, in custom_scaled_dot_product_fused_attention_overrideable_backward() argument
|
/aosp_15_r20/external/pytorch/torch/nested/_internal/ |
H A D | sdpa.py | 682 philox_offset,
|
/aosp_15_r20/external/pytorch/torch/ |
H A D | _meta_registrations.py | 5163 philox_offset: Tensor, 5308 philox_offset: Tensor, 5367 philox_offset: Tensor, 5467 philox_offset: Tensor, 5550 philox_offset: Tensor,
|