mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
`all_to_all_single_autograd` is not an op, all the code executed until the `all_to_all_single` dispatch is visible to the compiler. This means the `all_to_all_single_autograd` wrapper code must support symints in order to be traceable with dynamic shapes. FIXES https://github.com/pytorch/pytorch/issues/157479 Pull Request resolved: https://github.com/pytorch/pytorch/pull/157521 Approved by: https://github.com/wconstab
79 lines
2.2 KiB
C++
79 lines
2.2 KiB
C++
#pragma once
|
|
|
|
#include <torch/csrc/distributed/c10d/ProcessGroup.hpp>
|
|
|
|
namespace c10d {
|
|
|
|
C10_EXPORT at::Tensor& all_reduce_(
|
|
at::Tensor& input,
|
|
std::string reduce_op,
|
|
std::string group_name);
|
|
|
|
C10_EXPORT at::Tensor all_reduce(
|
|
const at::Tensor& input,
|
|
std::string reduce_op,
|
|
std::string group_name);
|
|
|
|
C10_EXPORT std::vector<at::Tensor> all_reduce_coalesced_(
|
|
std::vector<at::Tensor> inputs,
|
|
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
|
std::string reduce_op,
|
|
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
|
std::string group_name);
|
|
|
|
C10_EXPORT std::vector<at::Tensor> all_reduce_coalesced(
|
|
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
|
std::vector<at::Tensor> inputs,
|
|
std::string reduce_op,
|
|
std::string group_name);
|
|
|
|
C10_EXPORT std::vector<at::Tensor> all_gather_into_tensor_coalesced(
|
|
std::vector<at::Tensor> inputs,
|
|
int64_t group_size,
|
|
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
|
std::string group_name);
|
|
|
|
C10_EXPORT at::Tensor all_gather_into_tensor(
|
|
const at::Tensor& input,
|
|
int64_t group_size,
|
|
std::string group_name);
|
|
|
|
C10_EXPORT at::Tensor& all_gather_into_tensor_out(
|
|
at::Tensor& input,
|
|
int64_t group_size,
|
|
const std::string& group_name,
|
|
at::Tensor& output);
|
|
|
|
C10_EXPORT std::vector<at::Tensor> reduce_scatter_tensor_coalesced(
|
|
std::vector<at::Tensor> inputs,
|
|
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
|
std::string reduce_op,
|
|
int64_t group_size,
|
|
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
|
std::string group_name);
|
|
|
|
C10_EXPORT at::Tensor reduce_scatter_tensor(
|
|
const at::Tensor& input,
|
|
std::string reduce_op,
|
|
int64_t group_size,
|
|
std::string group_name);
|
|
|
|
C10_EXPORT at::Tensor all_to_all_single(
|
|
const at::Tensor& input,
|
|
at::SymIntArrayRef output_split_sizes,
|
|
at::SymIntArrayRef input_split_sizes,
|
|
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
|
std::string group_name);
|
|
|
|
C10_EXPORT at::Tensor& broadcast_(
|
|
at::Tensor& input,
|
|
int64_t src,
|
|
std::string group_name);
|
|
|
|
C10_EXPORT at::Tensor broadcast(
|
|
const at::Tensor& input,
|
|
int64_t src,
|
|
std::string group_name);
|
|
|
|
} // namespace c10d
|