xref: /aosp_15_r20/external/pytorch/torch/distributed/constants.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1from datetime import timedelta
2from typing import Optional
3
4from torch._C._distributed_c10d import _DEFAULT_PG_TIMEOUT
5
6
7__all__ = ["default_pg_timeout", "default_pg_nccl_timeout"]
8
9# Default process group wide timeout, if applicable.
10# This only applies to the non-nccl backends
11# To make an attempt at backwards compatibility with THD, we use an
12# extraordinarily high default timeout, given that THD did not have timeouts.
13default_pg_timeout: timedelta = _DEFAULT_PG_TIMEOUT
14# Separate timeout for PGNCCL mainly becuase it's always been that way in the C++ layer, but until recently
15# there was one default that applied across all backends in the python layer.
16# Later, we could consider merging them back together at the c++ layer if we can align on a same value.
17# (only if TORCH_NCCL_BLOCKING_WAIT or TORCH_NCCL_ASYNC_ERROR_HANDLING is set to 1).
18
19try:
20    from torch._C._distributed_c10d import _DEFAULT_PG_NCCL_TIMEOUT
21
22    default_pg_nccl_timeout: Optional[timedelta] = _DEFAULT_PG_NCCL_TIMEOUT
23except ImportError:
24    # if C++ NCCL support is not compiled, we don't have access to the default nccl value.
25    # if anyone is actually trying to use nccl in this state, it should error.
26    default_pg_nccl_timeout = None
27