mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: There are still a few work to be done: - Move logging and unify AT_WARN with LOG(ERROR). - A few header files are still being plumbed through, need cleaning. - caffe2::EnforceNotMet aliasing is not done yet. - need to unify the macros. See c10/util/Exception.h This is mainly a codemod and not causing functional changes. If you find your job failing and trace back to this diff, usually it can be fixed by the following approaches: (1) add //caffe2/c10:c10 to your dependency (or transitive dependency). (2) change objects such as at::Error, at::Optional to the c10 namespace. (3) change functions to the c10 namespace. Especially, caffe2::MakeString is not overridden by the unified c10::str function. Nothing else changes. Please kindly consider not reverting this diff - it involves multiple rounds of rebasing and the fix is usually simple. Contact jiayq@ or AI Platform Dev for details. Pull Request resolved: https://github.com/pytorch/pytorch/pull/12354 Reviewed By: orionr Differential Revision: D10238910 Pulled By: Yangqing fbshipit-source-id: 7794d5bf2797ab0ca6ebaccaa2f7ebbd50ff8f32
73 lines
2.3 KiB
C++
73 lines
2.3 KiB
C++
#include "torch/csrc/utils/pybind.h"
|
|
#include "torch/csrc/cuda/comm.h"
|
|
#include "torch/csrc/cuda/Stream.h"
|
|
#include "torch/csrc/cuda/THCP.h"
|
|
#include "torch/csrc/utils/auto_gil.h"
|
|
#include "torch/csrc/utils/functional.h"
|
|
|
|
#include <ATen/ATen.h>
|
|
|
|
#include <THC/THC.h>
|
|
|
|
#include <cstddef>
|
|
#include <vector>
|
|
|
|
namespace torch { namespace cuda { namespace python {
|
|
void initCommMethods(PyObject *module) {
|
|
auto m = py::cast<py::module>(module);
|
|
m.def(
|
|
"_broadcast_coalesced",
|
|
[](std::vector<at::Tensor>& tensors,
|
|
std::vector<int64_t> devices,
|
|
size_t buffer_size) {
|
|
return broadcast_coalesced(tensors, devices, buffer_size);
|
|
},
|
|
py::arg("tensors"),
|
|
py::arg("devices"),
|
|
py::arg("buffer_size"),
|
|
py::call_guard<py::gil_scoped_release>())
|
|
.def(
|
|
"_broadcast",
|
|
[](at::Tensor& tensor, std::vector<int64_t> devices) {
|
|
return broadcast(tensor, devices);
|
|
},
|
|
py::call_guard<py::gil_scoped_release>())
|
|
.def(
|
|
"_scatter",
|
|
[](at::Tensor& tensor,
|
|
std::vector<int64_t>& devices,
|
|
c10::optional<std::vector<int64_t>> chunk_sizes,
|
|
int64_t dim,
|
|
c10::optional<py::object> py_streams) {
|
|
c10::optional<std::vector<at::cuda::CUDAStream>> streams;
|
|
if (py_streams) {
|
|
py::handle handle = *py_streams;
|
|
streams = fmap(
|
|
THPUtils_PySequence_to_THCStreamList(handle.ptr()),
|
|
[](THCStream* stream) {
|
|
return at::cuda::CUDAStream(stream);
|
|
});
|
|
}
|
|
// Note: We're holding the GIL up to here.
|
|
AutoNoGIL no_gil;
|
|
return scatter(tensor, devices, chunk_sizes, dim, streams);
|
|
},
|
|
py::arg("tensor"),
|
|
py::arg("devices"),
|
|
py::arg("chunk_sizes"),
|
|
py::arg("dim"),
|
|
py::arg("streams"))
|
|
.def(
|
|
"_gather",
|
|
[](std::vector<at::Tensor>& tensors,
|
|
int64_t dim,
|
|
c10::optional<int32_t> destination_index) {
|
|
return gather(tensors, dim, destination_index);
|
|
},
|
|
py::arg("tensors"),
|
|
py::arg("dim"),
|
|
py::arg("destination_index"),
|
|
py::call_guard<py::gil_scoped_release>());
|
|
}
|
|
}}}
|