diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 83bf15893ca..c7c0e8ad4e9 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1033,7 +1033,7 @@ elseif(USE_CUDA) target_compile_definitions(torch_cuda PRIVATE USE_CUSPARSELT) endif() if(USE_NCCL) - target_link_libraries(torch_cuda PRIVATE __caffe2_nccl) + target_link_libraries(torch_cuda PRIVATE __caffe2_nccl fmt::fmt-header-only) target_compile_definitions(torch_cuda PRIVATE USE_NCCL) endif() if(USE_UCC) @@ -1776,7 +1776,7 @@ if(USE_ROCM) target_link_libraries(torch_hip PRIVATE ATEN_CUDA_FILES_GEN_LIB) endif() target_link_libraries(torch_hip PUBLIC torch_cpu_library ${Caffe2_PUBLIC_HIP_DEPENDENCY_LIBS}) - target_link_libraries(torch_hip PRIVATE ${Caffe2_HIP_DEPENDENCY_LIBS}) + target_link_libraries(torch_hip PRIVATE ${Caffe2_HIP_DEPENDENCY_LIBS} fmt::fmt-header-only) # Since PyTorch files contain HIP headers, this is also needed to capture the includes. target_include_directories(torch_hip PRIVATE ${Caffe2_HIP_INCLUDE}) diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt index 5c8974836de..b7524da4f46 100644 --- a/test/cpp/c10d/CMakeLists.txt +++ b/test/cpp/c10d/CMakeLists.txt @@ -13,6 +13,7 @@ function(c10d_add_test test_src) if(NOT WIN32) target_link_libraries(${test_name} pthread) endif() + target_link_libraries(${test_name} fmt::fmt-header-only) add_test(NAME ${test_name} COMMAND $) endfunction() @@ -92,4 +93,5 @@ if(LINUX AND USE_GLOO AND USE_C10D_GLOO) if(USE_CUDA) target_link_libraries(example_allreduce torch_cuda) endif() + target_link_libraries(example_allreduce fmt::fmt-header-only) endif() diff --git a/test/cpp/rpc/CMakeLists.txt b/test/cpp/rpc/CMakeLists.txt index 6834b428ff9..a430291f9d8 100644 --- a/test/cpp/rpc/CMakeLists.txt +++ b/test/cpp/rpc/CMakeLists.txt @@ -5,7 +5,7 @@ set(TORCH_RPC_TEST_SOURCES ${TORCH_RPC_TEST_DIR}/test_wire_serialization.cpp ) set(TORCH_RPC_TEST_DEPENDENCY_LIBS - torch gtest + torch gtest fmt::fmt-header-only ) if(USE_GLOO) diff --git a/torch/csrc/distributed/c10d/Backend.hpp b/torch/csrc/distributed/c10d/Backend.hpp index c9e8aec4397..05a39ddc905 100644 --- a/torch/csrc/distributed/c10d/Backend.hpp +++ b/torch/csrc/distributed/c10d/Backend.hpp @@ -33,6 +33,7 @@ class TORCH_API Backend : public torch::CustomClassHolder { std::chrono::milliseconds timeout; // backend name + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const std::string backend; }; @@ -397,7 +398,9 @@ class TORCH_API Backend : public torch::CustomClassHolder { // appropriate logging etc. void init(); + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const int rank_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const int size_; // Debug level setting. It is parsed once when ProcessGroup is constructed and // remains the same across use of this process group. diff --git a/torch/csrc/distributed/c10d/ProcessGroup.hpp b/torch/csrc/distributed/c10d/ProcessGroup.hpp index dcb6d155478..8c805020e8c 100644 --- a/torch/csrc/distributed/c10d/ProcessGroup.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroup.hpp @@ -59,10 +59,11 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder { std::chrono::milliseconds timeout; // backend name + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const std::string backend; }; - enum BackendType { + enum BackendType : uint8_t { UNDEFINED = 0, GLOO = 1, NCCL = 2, @@ -719,9 +720,13 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder { void init(); c10::intrusive_ptr store_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const int rank_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const int size_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const c10::intrusive_ptr options_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const BackendType backendType_; std::string pg_desc_; diff --git a/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp b/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp index e95191436b8..ada56cbee19 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp +++ b/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp @@ -975,7 +975,8 @@ c10::intrusive_ptr ProcessGroupGloo::broadcast( }; assertRootRank(invalidArgument, opts.rootRank, size_); - assertRootTensor(invalidArgument, opts.rootTensor, inputs.size()); + assertRootTensor( + invalidArgument, opts.rootTensor, static_cast(inputs.size())); assertDense(invalidArgument, inputs); assertTypeAndSizesMatch(invalidArgument, inputs); @@ -1300,7 +1301,9 @@ class AsyncSparseAllreduceWork : public ProcessGroupGloo::AsyncWork { // Allgatherv indices. gloo::AllgathervOptions opts(context); opts.setInput( - const_cast(input.const_data_ptr()), input.numel()); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) + const_cast(input.const_data_ptr()), + input.numel()); opts.setOutput(output.mutable_data_ptr(), counts); opts.setTag(tag); gloo::allgatherv(opts); @@ -1308,7 +1311,7 @@ class AsyncSparseAllreduceWork : public ProcessGroupGloo::AsyncWork { // Compile indices tensor per rank. std::vector indices; indices.reserve(metadata.size()); - size_t offset = 0; + int64_t offset = 0; for (const auto& i : metadata) { const auto nnz = i.nnz(); const auto numel = sparseDim * nnz; @@ -1325,7 +1328,7 @@ class AsyncSparseAllreduceWork : public ProcessGroupGloo::AsyncWork { const std::vector& metadata) { // There are nnz #dense_dim()-dimensional tensors per rank. const auto valueShape = tensor.sizes().slice(tensor.sparse_dim()); - size_t denseNumel = 1; + int64_t denseNumel = 1; for (auto dim : valueShape) { denseNumel *= dim; } @@ -1334,7 +1337,7 @@ class AsyncSparseAllreduceWork : public ProcessGroupGloo::AsyncWork { int64_t totalSize = 0; for (const auto i : c10::irange(metadata.size())) { counts[i] = metadata[i].nnz() * denseNumel; - totalSize += counts[i]; + totalSize += static_cast(counts[i]); } auto output = at::empty({totalSize}, tensor.scalar_type()); @@ -1353,7 +1356,7 @@ class AsyncSparseAllreduceWork : public ProcessGroupGloo::AsyncWork { // Compile values tensor per rank. std::vector values; values.reserve(metadata.size()); - size_t offset = 0; + int64_t offset = 0; for (const auto& i : metadata) { const auto nnz = i.nnz(); const auto numel = denseNumel * nnz; @@ -1740,7 +1743,8 @@ c10::intrusive_ptr ProcessGroupGloo::reduce( }; assertRootRank(invalidArgument, opts.rootRank, size_); - assertRootTensor(invalidArgument, opts.rootTensor, inputs.size()); + assertRootTensor( + invalidArgument, opts.rootTensor, static_cast(inputs.size())); assertSingleElement(invalidArgument, inputs); assertDense(invalidArgument, inputs); @@ -1832,7 +1836,7 @@ class AsyncAllgatherWork : public ProcessGroupGloo::AsyncWork { // Unflatten into output tensors. for (auto& outputgroup : outputs) { for (const auto j : c10::irange(outputgroup.size())) { - outputgroup[j].copy_(flatOutputTensor[j]); + outputgroup[j].copy_(flatOutputTensor[static_cast(j)]); } } } @@ -2102,7 +2106,7 @@ class AsyncAllgatherCoalescedWork : public ProcessGroupGloo::AsyncWork { for (const auto& t : output_lists[0]) { output_numel += t.numel(); } - output_numel *= output_lists.size(); + output_numel *= static_cast(output_lists.size()); // Use single flat output tensor. at::Tensor flatOutputTensor = at::empty({output_numel}, output_lists[0][0].options()); @@ -2251,7 +2255,7 @@ class AsyncGatherWork : public ProcessGroupGloo::AsyncWork { // Unflatten into output tensors on root process. if (context->rank == root) { for (const auto i : c10::irange(outputs[0].size())) { - outputs[0][i].copy_(flatOutputTensor[i]); + outputs[0][i].copy_(flatOutputTensor[static_cast(i)]); } } } @@ -2805,6 +2809,7 @@ c10::intrusive_ptr ProcessGroupGloo::send( // Construct unbound buffer. auto context = getContext(tag); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) auto buf = context->createUnboundBuffer(const_cast(ptr), size); buf->send(dstRank, utag); ++seq_; @@ -2945,8 +2950,8 @@ void ProcessGroupGloo::monitoredBarrier( // only enforce timeout on rank 0. This is so that other ranks aren't timed // out first, bringing down the job without reporting which rank timed out. if (rank != 0) { - auto sendWork = send(commTensor, 0, t1); - auto recvWork = recv(commTensor, 0, t2); + auto sendWork = send(commTensor, 0, static_cast(t1)); + auto recvWork = recv(commTensor, 0, static_cast(t2)); try { sendWork->wait(); recvWork->wait(); @@ -2970,7 +2975,8 @@ void ProcessGroupGloo::monitoredBarrier( // Failed/hanging ranks will not ack this call, letting rank 0 know about the // failure. for (const auto dstRank : c10::irange(1, worldSize)) { - recvWorkMap.insert({dstRank, recv(commTensor, dstRank, t1)}); + recvWorkMap.emplace( + dstRank, recv(commTensor, dstRank, static_cast(t1))); } auto waitLoop = [&](const std::map>& works) { @@ -3042,7 +3048,8 @@ void ProcessGroupGloo::monitoredBarrier( // ensures that this is a true barrier in that all ranks exit it successfully // or none of them do. for (const auto dstRank : c10::irange(1, worldSize)) { - sendWorkMap.insert({dstRank, send(commTensor, dstRank, t2)}); + sendWorkMap.emplace( + dstRank, send(commTensor, dstRank, static_cast(t2))); } waitLoop(sendWorkMap); diff --git a/torch/csrc/distributed/c10d/ProcessGroupMPI.cpp b/torch/csrc/distributed/c10d/ProcessGroupMPI.cpp index 94d7cd9cca0..29d05a9693b 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupMPI.cpp +++ b/torch/csrc/distributed/c10d/ProcessGroupMPI.cpp @@ -514,7 +514,7 @@ c10::intrusive_ptr ProcessGroupMPI::allgather( pgComm_)); for (const auto i : c10::irange(outputDataVec.size())) { - outputDataVec[i].copy_(flatOutputTensor[i]); + outputDataVec[i].copy_(flatOutputTensor[static_cast(i)]); } }; auto entry = std::make_unique( @@ -586,7 +586,8 @@ c10::intrusive_ptr ProcessGroupMPI::gather( const std::vector& outputDataVec = entry->dst; // copy the flattened output tensors to the outputs for (const auto i : c10::irange(outputDataVec.size())) { - outputDataVec.at(i).copy_(flatOutputTensor[i]); + outputDataVec.at(i).copy_( + flatOutputTensor[static_cast(i)]); } } }; @@ -647,7 +648,7 @@ c10::intrusive_ptr ProcessGroupMPI::scatter( // copy the input tensors to the flatten large send buffer for (const auto i : c10::irange(inputDataVec.size())) { - flatInputTensor[i].copy_(inputDataVec.at(i)); + flatInputTensor[static_cast(i)].copy_(inputDataVec.at(i)); } } @@ -793,16 +794,18 @@ c10::intrusive_ptr ProcessGroupMPI::alltoall( std::vector recv_offsets(size_); auto srcdata = entry->src; auto dstdata = entry->dst; - int64_t src_len = c10d::computeLengthsAndOffsets( + auto src_len = c10d::computeLengthsAndOffsets( srcdata, &send_lengths, &send_offsets); - int64_t dst_len = c10d::computeLengthsAndOffsets( + auto dst_len = c10d::computeLengthsAndOffsets( dstdata, &recv_lengths, &recv_offsets); std::vector send_lengthsL( send_lengths.begin(), send_lengths.end()); std::vector recv_lengthsL( recv_lengths.begin(), recv_lengths.end()); - at::Tensor srcFlatData = at::empty({src_len}, srcdata[0].options()); - at::Tensor dstFlatData = at::empty({dst_len}, dstdata[0].options()); + at::Tensor srcFlatData = + at::empty({static_cast(src_len)}, srcdata[0].options()); + at::Tensor dstFlatData = + at::empty({static_cast(dst_len)}, dstdata[0].options()); auto srcFlatDataSplits = srcFlatData.split_with_sizes(c10::IntArrayRef(send_lengthsL), 0); for (const auto i : c10::irange(size_)) { diff --git a/torch/csrc/distributed/c10d/ProcessGroupWrapper.cpp b/torch/csrc/distributed/c10d/ProcessGroupWrapper.cpp index a6086d28e91..e1b99009385 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupWrapper.cpp +++ b/torch/csrc/distributed/c10d/ProcessGroupWrapper.cpp @@ -31,12 +31,12 @@ struct CollectiveFingerPrint { std::vector tensor_device_types_; // input tensor sizes std::vector> tensor_sizes_; - int sequence_number_; + uint64_t sequence_number_; CollectiveFingerPrint( OpType op_type, const std::vector& input_tensors, - int sequence_number) + uint64_t sequence_number) : op_type_(op_type), num_tensors_(input_tensors.size()), sequence_number_(sequence_number) { @@ -57,7 +57,7 @@ struct CollectiveFingerPrint { std::vector tensor_dtypes, std::vector tensor_device_types, std::vector> tensor_sizes, - int sequence_number) + uint64_t sequence_number) : op_type_(op_type), num_tensors_(num_tensors), tensor_dtypes_(std::move(tensor_dtypes)), @@ -296,7 +296,7 @@ struct CollectiveFingerPrint { // 1. OpType data->push_back(static_cast(op_type_)); // sequence number - data->push_back(sequence_number_); + data->push_back(static_cast(sequence_number_)); // 2. Num tensors data->push_back(static_cast(num_tensors_)); // 3. Tensor dtypes @@ -309,13 +309,13 @@ struct CollectiveFingerPrint { } // 5. Shapes for (const auto& sizes : tensor_sizes_) { - data->push_back(sizes.size()); + data->push_back(static_cast(sizes.size())); for (const auto& s : sizes) { data->push_back(s); } } // Serialize data into tensor - int64_t data_size = data->size(); + int64_t data_size = static_cast(data->size()); // Need to release here and get the ptr due to C++ parameter evaluation // order. auto d = data.release(); diff --git a/torch/csrc/distributed/c10d/PyProcessGroup.hpp b/torch/csrc/distributed/c10d/PyProcessGroup.hpp index 684af0c2ece..1b1d307ea82 100644 --- a/torch/csrc/distributed/c10d/PyProcessGroup.hpp +++ b/torch/csrc/distributed/c10d/PyProcessGroup.hpp @@ -207,7 +207,7 @@ class TORCH_PYTHON_API PythonOnCompletionHook { hook_.ptr() = nullptr; } - void operator()(std::shared_ptr workInfo) const { + void operator()(const std::shared_ptr& workInfo) const { std::exception_ptr eptr; { py::gil_scoped_acquire acquire; diff --git a/torch/csrc/distributed/c10d/TCPStore.hpp b/torch/csrc/distributed/c10d/TCPStore.hpp index 91ed8952661..03a7f124ca7 100644 --- a/torch/csrc/distributed/c10d/TCPStore.hpp +++ b/torch/csrc/distributed/c10d/TCPStore.hpp @@ -30,10 +30,10 @@ class Counter { return count_; } double variance() const noexcept { - return m2_ / count_; + return m2_ / static_cast(count_); } double sample_variance() const noexcept { - return m2_ / (count_ - 1); + return m2_ / static_cast(count_ - 1); } private: diff --git a/torch/csrc/distributed/c10d/Utils.cpp b/torch/csrc/distributed/c10d/Utils.cpp index 170ed8f8b50..c35c99d0081 100644 --- a/torch/csrc/distributed/c10d/Utils.cpp +++ b/torch/csrc/distributed/c10d/Utils.cpp @@ -1,10 +1,6 @@ #include -#include #include -#include -#include -#include namespace c10d { diff --git a/torch/csrc/distributed/c10d/Utils.hpp b/torch/csrc/distributed/c10d/Utils.hpp index 36add3ad150..f0dfd562ddc 100644 --- a/torch/csrc/distributed/c10d/Utils.hpp +++ b/torch/csrc/distributed/c10d/Utils.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #ifdef _WIN32 @@ -66,7 +67,7 @@ inline void assertSameType( const std::string expected = type.toString(); const std::string actual = tensors[i].toString(); throw std::invalid_argument( - "mixed types (" + expected + " and " + actual + ")"); + fmt::format("mixed types ({} and {})", expected, actual)); } } } @@ -96,7 +97,7 @@ inline std::string getCvarString( /* parse environment variable in reverse order, so the early * versions of a variable get higher priority than the latter * versions of the same variable */ - for (int i = env.size() - 1; i >= 0; i--) { + for (ssize_t i = static_cast(env.size()) - 1; i >= 0; i--) { const char* val = std::getenv(env[i].c_str()); if (val == nullptr) { continue; @@ -123,7 +124,7 @@ inline int getCvarInt(const std::vector& env, int def) { /* parse environment variable in reverse order, so the early * versions of a variable get higher priority than the latter * versions of the same variable */ - for (int i = env.size() - 1; i >= 0; i--) { + for (ssize_t i = static_cast(env.size()) - 1; i >= 0; i--) { char* val = std::getenv(env[i].c_str()); if (val == nullptr) { continue; @@ -154,7 +155,7 @@ inline bool getCvarBool(const std::vector& env, bool def) { /* parse environment variable in reverse order, so the early * versions of a variable get higher priority than the latter * versions of the same variable */ - for (int i = env.size() - 1; i >= 0; i--) { + for (ssize_t i = static_cast(env.size()) - 1; i >= 0; i--) { char* val_ = std::getenv(env[i].c_str()); if (val_ == nullptr) { continue; @@ -166,6 +167,7 @@ inline bool getCvarBool(const std::vector& env, bool def) { std::string val = std::string(val_); for (auto& x : val) { + // NOLINTNEXTLINE(*-narrowing-conversions) x = std::tolower(x); } @@ -193,7 +195,7 @@ inline void assertSameSizes( const auto expected = toString(sizes); const auto actual = toString(tensors[i].sizes()); throw std::invalid_argument( - "mixed sizes (" + expected + " and " + actual + ")"); + fmt::format("mixed sizes ({} and {})", expected, actual)); } } } @@ -211,22 +213,20 @@ inline void assertSameSizeAndType(const std::vector& tensors) { if (!tensors[i].options().type_equal(options)) { const auto expected = toString(options); const auto actual = toString(tensors[i].options()); - throw std::invalid_argument( - "argument contains mixed types (" + expected + " and " + actual + - ")"); + throw std::invalid_argument(fmt::format( + "argument contains mixed types ({} and {})", expected, actual)); } if (!tensors[i].sizes().equals(sizes)) { const auto expected = toString(sizes); const auto actual = toString(tensors[i].sizes()); - throw std::invalid_argument( - "argument contains mixed sizes (" + expected + " and " + actual + - ")"); + throw std::invalid_argument(fmt::format( + "argument contains mixed types ({} and {})", expected, actual)); } } } inline void assertTypeMatch( - std::function fn, + const std::function& fn, const at::DeprecatedTypeProperties& type, const at::ArrayRef tensors, size_t index) { @@ -237,7 +237,7 @@ inline void assertTypeMatch( } inline void assertTypeMatch( - std::function fn, + const std::function& fn, const at::TensorOptions& options, const at::ArrayRef tensors, size_t index) { @@ -248,7 +248,7 @@ inline void assertTypeMatch( } inline void assertSizesMatch( - std::function fn, + const std::function& fn, const at::IntArrayRef& sizes, const at::ArrayRef tensors, size_t index) { @@ -259,7 +259,7 @@ inline void assertSizesMatch( } inline void assertLayoutMatch( - std::function fn, + const std::function& fn, const c10::Layout& expected, const at::ArrayRef tensors, size_t index) { @@ -271,7 +271,7 @@ inline void assertLayoutMatch( } inline void assertLayoutMatch( - std::function fn, + const std::function& fn, const at::ArrayRef tensors) { const auto& layout = tensors[0].layout(); for (const auto i : c10::irange(1, tensors.size())) { @@ -362,7 +362,7 @@ inline void assertSameDevice( } inline void assertTypeAndSizesMatch( - std::function fn, + const std::function& fn, const at::ArrayRef tensors, const at::DeprecatedTypeProperties& type, const at::IntArrayRef& sizes) { @@ -373,7 +373,7 @@ inline void assertTypeAndSizesMatch( } inline void assertTypeAndSizesMatch( - std::function fn, + const std::function& fn, const at::ArrayRef tensors, const at::TensorOptions& options, const at::IntArrayRef& sizes) { @@ -384,7 +384,7 @@ inline void assertTypeAndSizesMatch( } inline void assertTypeAndSizesMatch( - std::function fn, + const std::function& fn, const at::ArrayRef tensors) { const auto& options = tensors[0].options(); const auto sizes = tensors[0].sizes(); @@ -463,6 +463,7 @@ inline std::vector getDevices(const std::vector& tensors) { std::vector devices(tensors.size(), -1); if (tensors[0].device().is_cuda()) { for (const auto i : c10::irange(tensors.size())) { + // NOLINTNEXTLINE(bugprone-signed-char-misuse) devices[i] = tensors[i].storage().device().index(); } } @@ -620,8 +621,7 @@ void sendBytes( return; } - auto bytes = reinterpret_cast(buffer); - uint8_t* currentBytes = const_cast(bytes); + auto currentBytes = reinterpret_cast(buffer); int flags = 0; @@ -637,10 +637,9 @@ void sendBytes( #endif while (bytesToSend > 0) { - ssize_t bytesSent; + ssize_t bytesSent = 0; SYSCHECK_ERR_RETURN_NEG1( - bytesSent = - ::send(socket, (const char*)currentBytes, bytesToSend, flags)) + bytesSent = ::send(socket, currentBytes, bytesToSend, flags)) if (bytesSent == 0) { C10_THROW_ERROR(DistNetworkError, std::strerror(ECONNRESET)); } @@ -657,13 +656,12 @@ void recvBytes(int socket, T* buffer, size_t length) { return; } - auto bytes = reinterpret_cast(buffer); - uint8_t* currentBytes = bytes; + auto currentBytes = reinterpret_cast(buffer); while (bytesToReceive > 0) { - ssize_t bytesReceived; + ssize_t bytesReceived = 0; SYSCHECK_ERR_RETURN_NEG1( - bytesReceived = recv(socket, (char*)currentBytes, bytesToReceive, 0)) + bytesReceived = recv(socket, currentBytes, bytesToReceive, 0)) if (bytesReceived == 0) { C10_THROW_ERROR(DistNetworkError, std::strerror(ECONNRESET)); } @@ -684,7 +682,7 @@ void sendVector(int socket, const std::vector& vec, bool moreData = false) { // receive a vector as sent in sendVector template std::vector recvVector(int socket) { - SizeType valueSize; + SizeType valueSize = 0; recvBytes(socket, &valueSize, 1); std::vector value(valueSize); recvBytes(socket, value.data(), value.size()); @@ -716,7 +714,7 @@ inline void sendString( // receive a string as sent in sendString inline std::string recvString(int socket) { - SizeType valueSize; + SizeType valueSize = 0; recvBytes(socket, &valueSize, 1); std::vector value(valueSize); recvBytes(socket, value.data(), value.size()); diff --git a/torch/csrc/distributed/c10d/comm.hpp b/torch/csrc/distributed/c10d/comm.hpp index ee8db21c172..d2c608532ba 100644 --- a/torch/csrc/distributed/c10d/comm.hpp +++ b/torch/csrc/distributed/c10d/comm.hpp @@ -87,6 +87,7 @@ class TORCH_API GradBucket { std::vector sizes_vec_; // Model parameters for this bucket. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const std::vector parameters_; // Predefined sparse indices for this bucket (only used for sparse tensors). diff --git a/torch/csrc/distributed/c10d/default_comm_hooks.hpp b/torch/csrc/distributed/c10d/default_comm_hooks.hpp index 683841f3ba8..a4f47f13bb5 100644 --- a/torch/csrc/distributed/c10d/default_comm_hooks.hpp +++ b/torch/csrc/distributed/c10d/default_comm_hooks.hpp @@ -5,7 +5,7 @@ namespace c10d { -enum class BuiltinCommHookType { +enum class BuiltinCommHookType : uint8_t { ALLREDUCE = 1, FP16_COMPRESS = 2, }; diff --git a/torch/csrc/distributed/c10d/intra_node_comm.hpp b/torch/csrc/distributed/c10d/intra_node_comm.hpp index 0e65ebf8d69..ab27ecef973 100644 --- a/torch/csrc/distributed/c10d/intra_node_comm.hpp +++ b/torch/csrc/distributed/c10d/intra_node_comm.hpp @@ -14,9 +14,18 @@ constexpr size_t kDefaultBufferSize = 10ull * 1024 * 1024; using NvlMesh = std::array, kMaxDevices>; using HybridCubeMesh = std::array, kMaxDevices>; -enum class Topology { UNKNOWN = 0, FULLY_CONNECTED = 1, HYBRID_CUBE_MESH = 2 }; +enum class Topology : uint8_t { + UNKNOWN = 0, + FULLY_CONNECTED = 1, + HYBRID_CUBE_MESH = 2 +}; -enum class AllReduceAlgo { NONE = 0, ONE_SHOT = 1, TWO_SHOT = 2, HCM = 3 }; +enum class AllReduceAlgo : uint8_t { + NONE = 0, + ONE_SHOT = 1, + TWO_SHOT = 2, + HCM = 3 +}; class TORCH_API IntraNodeComm : public c10::intrusive_ptr_target { public: diff --git a/torch/csrc/distributed/c10d/reducer_timer.hpp b/torch/csrc/distributed/c10d/reducer_timer.hpp index acd8975c4d2..5f57051455f 100644 --- a/torch/csrc/distributed/c10d/reducer_timer.hpp +++ b/torch/csrc/distributed/c10d/reducer_timer.hpp @@ -23,7 +23,7 @@ class TORCH_API Timer { int64_t backward_comm_end_time = kUnsetTime; public: - enum class Event { + enum class Event : uint8_t { kForwardStart, kBackwardComputeStart, kBackwardComputeEnd,