1from datetime import timedelta 2from typing import Optional 3 4from torch._C._distributed_c10d import _DEFAULT_PG_TIMEOUT 5 6 7__all__ = ["default_pg_timeout", "default_pg_nccl_timeout"] 8 9# Default process group wide timeout, if applicable. 10# This only applies to the non-nccl backends 11# To make an attempt at backwards compatibility with THD, we use an 12# extraordinarily high default timeout, given that THD did not have timeouts. 13default_pg_timeout: timedelta = _DEFAULT_PG_TIMEOUT 14# Separate timeout for PGNCCL mainly becuase it's always been that way in the C++ layer, but until recently 15# there was one default that applied across all backends in the python layer. 16# Later, we could consider merging them back together at the c++ layer if we can align on a same value. 17# (only if TORCH_NCCL_BLOCKING_WAIT or TORCH_NCCL_ASYNC_ERROR_HANDLING is set to 1). 18 19try: 20 from torch._C._distributed_c10d import _DEFAULT_PG_NCCL_TIMEOUT 21 22 default_pg_nccl_timeout: Optional[timedelta] = _DEFAULT_PG_NCCL_TIMEOUT 23except ImportError: 24 # if C++ NCCL support is not compiled, we don't have access to the default nccl value. 25 # if anyone is actually trying to use nccl in this state, it should error. 26 default_pg_nccl_timeout = None 27