From 6da0e7f84b7802a1e8dfcb67a656fdda0403b88c Mon Sep 17 00:00:00 2001 From: cyy Date: Fri, 26 Jan 2024 13:33:24 +0000 Subject: [PATCH] [Clang-tidy header][17/N] Apply clang-tidy on headers in torch/csrc/cuda (#117829) Pull Request resolved: https://github.com/pytorch/pytorch/pull/117829 Approved by: https://github.com/albanD --- torch/csrc/cuda/CUDAPluggableAllocator.cpp | 1 + torch/csrc/cuda/Module.cpp | 19 ++++++------ torch/csrc/cuda/Module.h | 1 - torch/csrc/cuda/Stream.h | 1 + torch/csrc/cuda/comm.cpp | 19 +++++++----- torch/csrc/cuda/device_set.h | 1 + torch/csrc/cuda/memory_snapshot.cpp | 4 +-- torch/csrc/cuda/memory_snapshot.h | 2 +- torch/csrc/cuda/python_nccl.cpp | 34 ++++++++++++---------- torch/csrc/utils/pybind.h | 3 -- 10 files changed, 45 insertions(+), 40 deletions(-) diff --git a/torch/csrc/cuda/CUDAPluggableAllocator.cpp b/torch/csrc/cuda/CUDAPluggableAllocator.cpp index 34e00674ed8..4a4d8de7bcf 100644 --- a/torch/csrc/cuda/CUDAPluggableAllocator.cpp +++ b/torch/csrc/cuda/CUDAPluggableAllocator.cpp @@ -100,6 +100,7 @@ c10::DataPtr CUDAPluggableAllocator::allocate(size_t size) const { cudaStream_t stream = c10::cuda::getCurrentCUDAStream(static_cast(device)); void* r = + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) const_cast(this)->malloc(size, device, stream); c10::DataPtr data_ptr = { r, diff --git a/torch/csrc/cuda/Module.cpp b/torch/csrc/cuda/Module.cpp index 08a6137633e..0980f78223e 100644 --- a/torch/csrc/cuda/Module.cpp +++ b/torch/csrc/cuda/Module.cpp @@ -80,17 +80,13 @@ static void poison_fork() { // CUDA management methods //////////////////////////////////////////////////////////////////////////////// -void THCPModule_setDevice(int device) { - c10::cuda::set_device(static_cast(device)); -} - PyObject* THCPModule_setDevice_wrap(PyObject* self, PyObject* arg) { HANDLE_TH_ERRORS TORCH_CHECK(THPUtils_checkLong(arg), "invalid argument to setDevice"); - int64_t device = THPUtils_unpackLong(arg); + auto device = THPUtils_unpackLong(arg); torch::utils::cuda_lazy_init(); - THCPModule_setDevice(device); + c10::cuda::set_device(static_cast(device)); Py_RETURN_NONE; END_HANDLE_TH_ERRORS @@ -259,6 +255,7 @@ PyObject* THCPModule_setStream_wrap( args, kwargs, "|LLL", + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) const_cast(kwlist), &stream_id, &device_index, @@ -266,11 +263,13 @@ PyObject* THCPModule_setStream_wrap( } auto stream = at::cuda::CUDAStream::unpack3( - stream_id, device_index, static_cast(device_type)); + stream_id, + static_cast(device_index), + static_cast(device_type)); auto device = c10::cuda::current_device(); if (device != stream.device_index()) { - THCPModule_setDevice(stream.device_index()); + c10::cuda::set_device(stream.device_index()); } at::cuda::setCurrentCUDAStream(stream); Py_RETURN_NONE; @@ -926,7 +925,7 @@ static void registerCudaDeviceProperties(PyObject* module) { static_cast, c10::optional, - std::string, + const std::string&, size_t)>(torch::cuda::_record_memory_history)); m.def("_cuda_isHistoryEnabled", []() { @@ -1211,7 +1210,7 @@ static void registerCudaPluggableAllocator(PyObject* module) { } } auto delta = c10::cuda::CUDACachingAllocator::setCheckpointPoolState( - device, pps); + device, std::move(pps)); auto& freed_pointers = delta.ptrs_freed; std::unordered_set allocd_set; diff --git a/torch/csrc/cuda/Module.h b/torch/csrc/cuda/Module.h index 23d9079f146..0c89e4bc65f 100644 --- a/torch/csrc/cuda/Module.h +++ b/torch/csrc/cuda/Module.h @@ -1,7 +1,6 @@ #ifndef THCP_CUDA_MODULE_INC #define THCP_CUDA_MODULE_INC -void THCPModule_setDevice(int idx); PyObject* THCPModule_getDevice_wrap(PyObject* self); PyObject* THCPModule_setDevice_wrap(PyObject* self, PyObject* arg); PyObject* THCPModule_getDeviceName_wrap(PyObject* self, PyObject* arg); diff --git a/torch/csrc/cuda/Stream.h b/torch/csrc/cuda/Stream.h index 6175ac2ea03..9b7197d7439 100644 --- a/torch/csrc/cuda/Stream.h +++ b/torch/csrc/cuda/Stream.h @@ -5,6 +5,7 @@ #include #include +// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) struct THCPStream : THPStream { at::cuda::CUDAStream cuda_stream; }; diff --git a/torch/csrc/cuda/comm.cpp b/torch/csrc/cuda/comm.cpp index 4863f77fd74..c8bbec87cae 100644 --- a/torch/csrc/cuda/comm.cpp +++ b/torch/csrc/cuda/comm.cpp @@ -103,8 +103,9 @@ std::vector broadcast(const Tensor& tensor, IntArrayRef devices) { if (device != tensor.get_device()) { diff_device_dst_tensors.emplace_back(at::empty( tensor.sizes(), - tensor.options().device( - at::Device(DeviceType::CUDA, device)))); // preserve memory format + tensor.options().device(at::Device( + DeviceType::CUDA, + static_cast(device))))); // preserve memory format } } _broadcast_out_impl(tensor, diff_device_dst_tensors); @@ -178,7 +179,7 @@ tensor_list2d broadcast_coalesced( o.reserve(tensors.size()); unique_type_checker type_checker; - at::cuda::CUDAGuard device_guard(devices[0]); + at::cuda::CUDAGuard device_guard(static_cast(devices[0])); for (auto& chunk : torch::utils::take_tensors(tensors, buffer_size)) { auto type_id = chunk.type_id(); type_checker.show(type_id); @@ -189,7 +190,7 @@ tensor_list2d broadcast_coalesced( auto broadcast_values = broadcast(flat_tuple.second, devices); results.reserve(devices.size()); for (size_t i = 1, num_devices = devices.size(); i < num_devices; ++i) { - device_guard.set_index(devices[i]); + device_guard.set_index(static_cast(devices[i])); auto& device_outputs = outputs[i]; auto& inds = broadcast_indices[i]; auto& vals = broadcast_values[i]; @@ -203,7 +204,7 @@ tensor_list2d broadcast_coalesced( auto results = broadcast( torch::utils::flatten_dense_tensors(chunk.tensors), devices); for (size_t i = 1, num_devices = devices.size(); i < num_devices; ++i) { - device_guard.set_index(devices[i]); + device_guard.set_index(static_cast(devices[i])); auto& device_outputs = outputs[i]; for (auto& var : torch::utils::unflatten_dense_tensors(results[i], chunk.tensors)) { @@ -327,10 +328,10 @@ std::vector scatter( chunk_sizes->size()); } dim = at::maybe_wrap_dim(dim, tensor); - // NOLINTNEXTLINE(cppcoreguidelines-init-variables) std::vector chunks = chunk_sizes ? tensor.split_with_sizes(/*split_sizes=*/*chunk_sizes, /*dim=*/dim) - : tensor.chunk(/*chunks=*/devices.size(), /*dim=*/dim); + : tensor.chunk( + /*chunks=*/static_cast(devices.size()), /*dim=*/dim); at::cuda::OptionalCUDAStreamGuard cuda_guard; for (const auto i : c10::irange(chunks.size())) { const auto device_index = static_cast(devices[i]); @@ -494,7 +495,9 @@ at::Tensor gather( at::Device device(DeviceType::CPU); if (!destination_index || *destination_index != -1) { device = at::Device( - DeviceType::CUDA, destination_index ? *destination_index : -1); + DeviceType::CUDA, + destination_index ? static_cast(*destination_index) + : DeviceIndex(-1)); } at::Tensor result = diff --git a/torch/csrc/cuda/device_set.h b/torch/csrc/cuda/device_set.h index 82fa34294d3..acd6f6e6b37 100644 --- a/torch/csrc/cuda/device_set.h +++ b/torch/csrc/cuda/device_set.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace torch { diff --git a/torch/csrc/cuda/memory_snapshot.cpp b/torch/csrc/cuda/memory_snapshot.cpp index 1d6ece35cb1..f1b8e405a40 100644 --- a/torch/csrc/cuda/memory_snapshot.cpp +++ b/torch/csrc/cuda/memory_snapshot.cpp @@ -68,7 +68,7 @@ std::vector ivalue_symbolize( for (const auto& e : t) { l.push_back(all_frames.at(e)); } - py_unique_frames.push_back(std::move(l)); + py_unique_frames.emplace_back(std::move(l)); } std::vector result; @@ -132,7 +132,7 @@ static void checkOptionIn( void _record_memory_history( c10::optional enabled, c10::optional context, - std::string stacks, + const std::string& stacks, size_t max_entries) { if (enabled) { checkOptionIn( diff --git a/torch/csrc/cuda/memory_snapshot.h b/torch/csrc/cuda/memory_snapshot.h index dfffbcb5a88..db39553cb7b 100644 --- a/torch/csrc/cuda/memory_snapshot.h +++ b/torch/csrc/cuda/memory_snapshot.h @@ -18,7 +18,7 @@ TORCH_CUDA_CU_API void _record_memory_history( TORCH_CUDA_CU_API void _record_memory_history( c10::optional enabled = "all", c10::optional context = "all", - std::string stacks = "all", + const std::string& stacks = "all", size_t max_entries = UINT64_MAX); TORCH_CUDA_CU_API std::string _memory_snapshot_pickled(); diff --git a/torch/csrc/cuda/python_nccl.cpp b/torch/csrc/cuda/python_nccl.cpp index 2bd497e4721..db6f6c68070 100644 --- a/torch/csrc/cuda/python_nccl.cpp +++ b/torch/csrc/cuda/python_nccl.cpp @@ -21,7 +21,7 @@ using namespace torch::cuda::nccl::detail; static const char* COMM_CAPSULE_NAME = "torch.cuda.nccl.Communicator"; PyObject* THCPModule_nccl_version(PyObject* self, PyObject* args) { - return PyInt_FromLong(version()); + return PyLong_FromUnsignedLongLong(version()); } PyObject* THCPModule_nccl_version_suffix(PyObject* self, PyObject* args) { @@ -99,10 +99,10 @@ static std::vector unpack_comms(PyObject* obj, size_t size) { PyObject* THCPModule_nccl_init_rank(PyObject* self, PyObject* args) { HANDLE_TH_ERRORS - int nranks; - const char* id; - Py_ssize_t id_len; - int rank; + int nranks = 0; + const char* id = nullptr; + Py_ssize_t id_len = 0; + int rank = 0; if (!PyArg_ParseTuple( args, "is#i:nccl_init_rank", &nranks, &id, &id_len, &rank)) { @@ -118,7 +118,7 @@ PyObject* THCPModule_nccl_init_rank(PyObject* self, PyObject* args) { ncclUniqueId commId; memcpy(&commId, id, NCCL_UNIQUE_ID_BYTES); - ncclComm_t comm; + ncclComm_t comm = nullptr; { pybind11::gil_scoped_release no_gil; comm = comm_init_rank(nranks, commId, rank); @@ -129,8 +129,9 @@ PyObject* THCPModule_nccl_init_rank(PyObject* self, PyObject* args) { PyObject* THCPModule_nccl_reduce(PyObject* self, PyObject* args) { HANDLE_TH_ERRORS - PyObject *_inputs, *_output, *_streams, *_comms; - int root, op; + PyObject *_inputs = nullptr, *_output = nullptr, *_streams = nullptr, + *_comms = nullptr; + int root = 0, op = 0; if (!PyArg_ParseTuple( args, "OOiiOO", &_inputs, &_output, &root, &op, &_streams, &_comms)) { @@ -161,8 +162,9 @@ PyObject* THCPModule_nccl_reduce(PyObject* self, PyObject* args) { PyObject* THCPModule_nccl_all_reduce(PyObject* self, PyObject* args) { HANDLE_TH_ERRORS - PyObject *_inputs, *_outputs, *_streams, *_comms; - int op; + PyObject *_inputs = nullptr, *_outputs = nullptr, *_streams = nullptr, + *_comms = nullptr; + int op = 0; if (!PyArg_ParseTuple( args, "OOiOO", &_inputs, &_outputs, &op, &_streams, &_comms)) { @@ -193,8 +195,8 @@ PyObject* THCPModule_nccl_all_reduce(PyObject* self, PyObject* args) { PyObject* THCPModule_nccl_broadcast(PyObject* self, PyObject* args) { HANDLE_TH_ERRORS - PyObject *_inputs, *_streams, *_comms; - int root; + PyObject *_inputs = nullptr, *_streams = nullptr, *_comms = nullptr; + int root = 0; if (!PyArg_ParseTuple(args, "OiOO", &_inputs, &root, &_streams, &_comms)) { THPUtils_invalidArguments( @@ -224,7 +226,8 @@ PyObject* THCPModule_nccl_broadcast(PyObject* self, PyObject* args) { PyObject* THCPModule_nccl_all_gather(PyObject* self, PyObject* args) { HANDLE_TH_ERRORS - PyObject *_inputs, *_outputs, *_streams, *_comms; + PyObject *_inputs = nullptr, *_outputs = nullptr, *_streams = nullptr, + *_comms = nullptr; if (!PyArg_ParseTuple( args, "OOOO", &_inputs, &_outputs, &_streams, &_comms)) { @@ -255,8 +258,9 @@ PyObject* THCPModule_nccl_all_gather(PyObject* self, PyObject* args) { PyObject* THCPModule_nccl_reduce_scatter(PyObject* self, PyObject* args) { HANDLE_TH_ERRORS - PyObject *_inputs, *_outputs, *_streams, *_comms; - int op; + PyObject *_inputs = nullptr, *_outputs = nullptr, *_streams = nullptr, + *_comms = nullptr; + int op = 0; if (!PyArg_ParseTuple( args, "OOiOO", &_inputs, &_outputs, &op, &_streams, &_comms)) { diff --git a/torch/csrc/utils/pybind.h b/torch/csrc/utils/pybind.h index e9d95bb4e97..36cb83659aa 100644 --- a/torch/csrc/utils/pybind.h +++ b/torch/csrc/utils/pybind.h @@ -15,9 +15,6 @@ #include #include -#include -#include - namespace py = pybind11; // This makes intrusive_ptr to be available as a custom pybind11 holder type,