From 9255ffc84145bdf6b849b1a297bdba25fe01d04e Mon Sep 17 00:00:00 2001
From: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>
Date: Thu, 26 Dec 2024 17:13:10 +0000
Subject: [PATCH] Revert "Enable more C++ warnings (#143355)"

This reverts commit daa3ffe0ebff58577b8db964447b6abc6de53a25.

Reverted https://github.com/pytorch/pytorch/pull/143355 on behalf of https://github.com/malfet due to It fails internal build system as it kind of breaks separation between native and native/cpu ([comment](https://github.com/pytorch/pytorch/pull/143355#issuecomment-2562961546))
---
 .clang-format                                          |  2 --
 CMakeLists.txt                                         |  1 +
 aten/src/ATen/native/QuantizedLinear.cpp               |  3 ++-
 aten/src/ATen/native/RNN.cpp                           |  2 ++
 .../src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp |  2 ++
 aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp    |  9 +++++++--
 aten/src/ATen/native/quantized/cpu/fbgemm_utils.h      |  7 +------
 aten/src/ATen/native/quantized/cpu/qlinear.cpp         |  2 ++
 aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp |  2 ++
 aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp |  2 ++
 aten/src/ATen/native/quantized/cudnn/Conv.cpp          |  7 ++++++-
 aten/src/ATen/native/quantized/cudnn/ConvPrepack.cpp   |  7 ++++++-
 aten/src/ATen/native/quantized/library.cpp             | 10 +++++++++-
 aten/src/ATen/native/quantized/qconv_unpack.cpp        |  7 +++++++
 aten/src/ATen/native/quantized/qlinear_unpack.cpp      |  3 +++
 torch/csrc/distributed/c10d/GlooDeviceFactory.cpp      |  8 ++++----
 torch/csrc/distributed/rpc/tensorpipe_agent.cpp        |  6 +++---
 torch/csrc/distributed/rpc/tensorpipe_cuda.cpp         |  8 ++++----
 torch/csrc/jit/passes/onnx/constant_map.h              |  5 +++++
 torch/csrc/jit/serialization/export.cpp                |  6 +++++-
 20 files changed, 73 insertions(+), 26 deletions(-)
diff --git a/.clang-format b/.clang-format
index 2e516150410..0b94540e7a2 100644
--- a/.clang-format
+++ b/.clang-format
@@ -106,8 +106,6 @@ StatementMacros:
   - C10_DEFINE_int32
   - C10_DEFINE_int64
   - C10_DEFINE_string
-  - C10_DEFINE_REGISTRY_WITHOUT_WARNING
-  - C10_REGISTER_CREATOR
   - DEFINE_BINARY
   - PyObject_HEAD
   - PyObject_VAR_HEAD
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b43137523cf..c3ba29a5009 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1057,6 +1057,7 @@ if(NOT MSVC)
   append_cxx_flag_if_supported("-Wconstant-conversion" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable"
                                CMAKE_CXX_FLAGS)
+  append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Qunused-arguments" CMAKE_CXX_FLAGS)
 
   if(${USE_COLORIZE_OUTPUT})
diff --git a/aten/src/ATen/native/QuantizedLinear.cpp b/aten/src/ATen/native/QuantizedLinear.cpp
index 6d6c6909fe3..e2f3f06f64c 100644
--- a/aten/src/ATen/native/QuantizedLinear.cpp
+++ b/aten/src/ATen/native/QuantizedLinear.cpp
@@ -1,4 +1,5 @@
 #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
+#include <vector>
 
 #include <ATen/core/Tensor.h>
 #include <ATen/Parallel.h>
@@ -115,7 +116,7 @@ Tensor fbgemm_linear_int8_weight_fp32_activation(
   const Tensor bias_contig = bias.contiguous();
 
   // Allocate output Tensor and a buffer for fbgemmPacked to use
-  auto output_size = input.sizes().vec();
+  std::vector<int64_t> output_size = input.sizes().vec();
   output_size.back() = N;
   Tensor output = at::empty(output_size, input.options().dtype(at::kFloat), LEGACY_CONTIGUOUS_MEMORY_FORMAT);
   Tensor buffer = at::empty(output_size, input.options().dtype(at::kInt), LEGACY_CONTIGUOUS_MEMORY_FORMAT);
diff --git a/aten/src/ATen/native/RNN.cpp b/aten/src/ATen/native/RNN.cpp
index bd503854899..eee703ce473 100644
--- a/aten/src/ATen/native/RNN.cpp
+++ b/aten/src/ATen/native/RNN.cpp
@@ -62,6 +62,8 @@
 #include <utility>
 #endif
 
+int register_linear_params();
+
 namespace at::native {
 
 namespace {
diff --git a/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp b/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp
index c9c09cf2464..df74b10d70f 100644
--- a/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp
+++ b/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp
@@ -19,6 +19,8 @@
 #include <ATen/ops/from_blob.h>
 #endif
 
+int register_linear_params();
+
 #ifdef USE_FBGEMM
 std::tuple<at::Tensor, std::optional<at::Tensor>> PackedLinearWeight::unpack() {
   auto packB = w.get();
diff --git a/aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp b/aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp
index 33edbfac1fc..fc77d44e18c 100644
--- a/aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp
+++ b/aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp
@@ -28,6 +28,7 @@
 #include <utility>
 #endif
 
+int register_embedding_params();
 
 #ifdef USE_FBGEMM
 
@@ -380,7 +381,9 @@ namespace {
   }
 }
 
-template <int kSpatialDim> int register_conv_params() {
+template <int kSpatialDim = 2>
+TORCH_API int
+register_conv_params() {
   static auto register_conv_params =
     torch::selective_class_<ConvPackedParamsBase<kSpatialDim>>(
         "quantized", TORCH_SELECTIVE_CLASS(_hack_int_to_class_name(kSpatialDim)))
@@ -417,7 +420,9 @@ TORCH_API int register_conv_params<2>();
 template
 TORCH_API int register_conv_params<3>();
 
-int register_linear_params() {
+TORCH_API int register_linear_params();
+
+TORCH_API int register_linear_params() {
   using SerializationType = std::tuple<at::Tensor, std::optional<at::Tensor>>;
   static auto register_linear_params =
       torch::selective_class_<LinearPackedParamsBase>(
diff --git a/aten/src/ATen/native/quantized/cpu/fbgemm_utils.h b/aten/src/ATen/native/quantized/cpu/fbgemm_utils.h
index 62a21c439b7..05d63c8476a 100644
--- a/aten/src/ATen/native/quantized/cpu/fbgemm_utils.h
+++ b/aten/src/ATen/native/quantized/cpu/fbgemm_utils.h
@@ -6,7 +6,7 @@
 #include <c10/core/QScheme.h>
 #include <c10/util/irange.h>
 
-#if defined(USE_FBGEMM) && __has_include(<fbgemm/Fbgemm.h>)
+#ifdef USE_FBGEMM
 #include <fbgemm/Fbgemm.h>
 C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Winconsistent-missing-destructor-override")
 #include <fbgemm/FbgemmFP16.h>
@@ -407,8 +407,3 @@ struct TORCH_API PackedEmbeddingBagWeight : public EmbeddingPackedParamsBase {
       bool include_last_offset,
       bool is_embedding_op) override;
 };
-
-TORCH_API int register_linear_params();
-int register_embedding_params();
-
-template <int kSpatialDim = 2> TORCH_API int register_conv_params();
diff --git a/aten/src/ATen/native/quantized/cpu/qlinear.cpp b/aten/src/ATen/native/quantized/cpu/qlinear.cpp
index 05be6ebbf8d..27139bcb40c 100644
--- a/aten/src/ATen/native/quantized/cpu/qlinear.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qlinear.cpp
@@ -31,6 +31,8 @@
 #include <algorithm>
 #include <string>
 
+int register_linear_params();
+
 #ifdef USE_FBGEMM
 template <bool ReluFused>
 at::Tensor& PackedLinearWeight::apply_impl(
diff --git a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp
index 27e1b24d236..091e309cd95 100644
--- a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp
@@ -29,6 +29,8 @@
 #include <string>
 #include <type_traits>
 
+int register_linear_params();
+
 #ifdef USE_FBGEMM
 template <bool ReluFused>
 at::Tensor PackedLinearWeight::apply_dynamic_impl(
diff --git a/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp b/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp
index d4055203705..3e4ce69a899 100644
--- a/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp
@@ -31,6 +31,8 @@
 #include <utility>
 #include <vector>
 
+int register_linear_params();
+
 #ifdef USE_FBGEMM
 namespace {
 // Calculate the column offsets.
diff --git a/aten/src/ATen/native/quantized/cudnn/Conv.cpp b/aten/src/ATen/native/quantized/cudnn/Conv.cpp
index dff887118ee..d37f674f5c7 100644
--- a/aten/src/ATen/native/quantized/cudnn/Conv.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Conv.cpp
@@ -9,7 +9,6 @@
 #include <ATen/cuda/Exceptions.h>
 #include <ATen/cudnn/Handle.h>
 #include <ATen/native/cudnn/ConvShared.h>
-#include <ATen/native/quantized/cpu/fbgemm_utils.h>
 #include <ATen/native/quantized/cudnn/utils.h>
 #include <ATen/native/quantized/ConvUtils.h>
 #include <ATen/native/quantized/PackedParams.h>
@@ -23,6 +22,12 @@
 #include <unordered_map>
 #include <vector>
 
+template <int kSpatialDim = 2>
+int register_conv_params();
+
+extern template int register_conv_params<2>();
+extern template int register_conv_params<3>();
+
 // TODO: there is a table from input dtype and weight dtype to operator qdtype,
 // we can derive the operator dtype based on input dtype
 cudnn_frontend::ConvDesc_v8 getConvDescriptor(cudnnDataType_t dataType, c10::IntArrayRef padding, c10::IntArrayRef stride, c10::IntArrayRef dilation) {
diff --git a/aten/src/ATen/native/quantized/cudnn/ConvPrepack.cpp b/aten/src/ATen/native/quantized/cudnn/ConvPrepack.cpp
index da46dccab31..9103bdd0d41 100644
--- a/aten/src/ATen/native/quantized/cudnn/ConvPrepack.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/ConvPrepack.cpp
@@ -5,7 +5,6 @@
 
 #include <ATen/ATen.h>
 #include <torch/library.h>
-#include <ATen/native/quantized/cpu/fbgemm_utils.h>
 #include <ATen/native/quantized/cpu/QuantUtils.h>
 #include <ATen/native/quantized/cudnn/utils.h>
 #include <ATen/native/quantized/PackedParams.h>
@@ -16,6 +15,12 @@
 
 #include <utility>
 
+template <int kSpatialDim = 2>
+int register_conv_params();
+
+extern template int register_conv_params<2>();
+extern template int register_conv_params<3>();
+
 template <int kSpatialDim>
 c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeightCudnn<
     kSpatialDim>::
diff --git a/aten/src/ATen/native/quantized/library.cpp b/aten/src/ATen/native/quantized/library.cpp
index 7d2ed9f42e0..72dcda2b74d 100644
--- a/aten/src/ATen/native/quantized/library.cpp
+++ b/aten/src/ATen/native/quantized/library.cpp
@@ -1,5 +1,13 @@
 #include <torch/library.h>
-#include <aten/src/ATen/native/quantized/cpu/fbgemm_utils.h>
+
+int register_linear_params();
+
+template <int kSpatialDim = 2>
+int register_conv_params();
+
+extern template int register_conv_params<2>();
+extern template int register_conv_params<3>();
+int register_embedding_params();
 
 TORCH_LIBRARY(quantized, m) {
   m.set_python_module("caffe2.torch.fb.model_transform.splitting.split_dispatcher");
diff --git a/aten/src/ATen/native/quantized/qconv_unpack.cpp b/aten/src/ATen/native/quantized/qconv_unpack.cpp
index f613097fdba..f33bd6cf96f 100644
--- a/aten/src/ATen/native/quantized/qconv_unpack.cpp
+++ b/aten/src/ATen/native/quantized/qconv_unpack.cpp
@@ -28,6 +28,13 @@ and /cudnn/ConvUnpackImpl.cpp, for cudnn.
 #include <ATen/ops/from_blob.h>
 #endif
 
+template <int kSpatialDim = 2>
+int register_conv_params();
+
+extern template int register_conv_params<2>();
+extern template int register_conv_params<3>();
+
+
 
 namespace at::native {
 namespace {
diff --git a/aten/src/ATen/native/quantized/qlinear_unpack.cpp b/aten/src/ATen/native/quantized/qlinear_unpack.cpp
index 2999e183b9e..a485094df3c 100644
--- a/aten/src/ATen/native/quantized/qlinear_unpack.cpp
+++ b/aten/src/ATen/native/quantized/qlinear_unpack.cpp
@@ -13,6 +13,9 @@ and /cudnn/linear_unpack_impl.cpp, for cudnn.
 #include <torch/custom_class.h>
 #include <torch/library.h>
 
+int register_linear_params();
+
+
 namespace at::native {
 namespace {
 
diff --git a/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp b/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp
index 478b73e0993..47a9a02ae81 100644
--- a/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp
+++ b/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp
@@ -96,7 +96,7 @@ static std::shared_ptr<::gloo::transport::Device> makeTCPTLSDevice(
       attr, pkey, cert, caFile, caPath);
 }
 
-C10_REGISTER_CREATOR(GlooDeviceRegistry, TCP_TLS, makeTCPTLSDevice)
+C10_REGISTER_CREATOR(GlooDeviceRegistry, TCP_TLS, makeTCPTLSDevice);
 #endif
 
 #if GLOO_HAVE_TRANSPORT_UV
@@ -120,9 +120,9 @@ static std::shared_ptr<::gloo::transport::Device> makeUVDevice(
 // Registry priority is per key identifier. We register UV to `APPLE` for
 // the flexibility of other application to override by priority. Register
 // UV to `UV` for env "GLOO_DEVICE_TRANSPORT" override.
-C10_REGISTER_CREATOR(GlooDeviceRegistry, APPLE, makeUVDevice)
-C10_REGISTER_CREATOR(GlooDeviceRegistry, WIN32, makeUVDevice)
-C10_REGISTER_CREATOR(GlooDeviceRegistry, UV, makeUVDevice)
+C10_REGISTER_CREATOR(GlooDeviceRegistry, APPLE, makeUVDevice);
+C10_REGISTER_CREATOR(GlooDeviceRegistry, WIN32, makeUVDevice);
+C10_REGISTER_CREATOR(GlooDeviceRegistry, UV, makeUVDevice);
 #endif
 
 namespace {
diff --git a/torch/csrc/distributed/rpc/tensorpipe_agent.cpp b/torch/csrc/distributed/rpc/tensorpipe_agent.cpp
index 9801a0327dd..a08aaa4576f 100644
--- a/torch/csrc/distributed/rpc/tensorpipe_agent.cpp
+++ b/torch/csrc/distributed/rpc/tensorpipe_agent.cpp
@@ -153,11 +153,11 @@ void makeStreamsWaitOnOthers(
 
 C10_DEFINE_REGISTRY_WITHOUT_WARNING(
     TensorPipeTransportRegistry,
-    TransportRegistration)
+    TransportRegistration);
 
 C10_DEFINE_REGISTRY_WITHOUT_WARNING(
     TensorPipeChannelRegistry,
-    ChannelRegistration)
+    ChannelRegistration);
 
 const std::string& TensorPipeAgent::guessAddress() {
   static const std::string uvAddress = []() {
@@ -284,7 +284,7 @@ std::unique_ptr<ChannelRegistration> makeMultiplexedUvChannel() {
 C10_REGISTER_CREATOR(
     TensorPipeChannelRegistry,
     mpt_uv,
-    makeMultiplexedUvChannel)
+    makeMultiplexedUvChannel);
 
 } // namespace
 
diff --git a/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp b/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp
index 4c326b6a0e2..97341a41899 100644
--- a/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp
+++ b/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp
@@ -24,7 +24,7 @@ std::unique_ptr<ChannelRegistration> makeCudaIpcChannel() {
 }
 
 // The cuda_ipc channels use cudaMemcpy to transmit CUDA tensor across processes
-C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_ipc, makeCudaIpcChannel)
+C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_ipc, makeCudaIpcChannel);
 
 #endif
 
@@ -44,7 +44,7 @@ std::unique_ptr<ChannelRegistration> makeCudaGdrChannel() {
 // in order to ensure readiness and to agree on the device indices and thus the
 // queue pair to use. It automatically pairs each GPU to the "closest" NIC if
 // there are multiple of them (closest = longest prefix match in PCI tree).
-C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_gdr, makeCudaGdrChannel)
+C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_gdr, makeCudaGdrChannel);
 
 #endif
 
@@ -55,7 +55,7 @@ std::unique_ptr<ChannelRegistration> makeCudaXthChannel() {
 }
 
 // The cuda_xth channel supports same-process GPU-to-GPU comm
-C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_xth, makeCudaXthChannel)
+C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_xth, makeCudaXthChannel);
 
 std::unique_ptr<ChannelRegistration> makeCudaBasicChannel() {
   auto context = tensorpipe::channel::cuda_basic::create(
@@ -68,7 +68,7 @@ std::unique_ptr<ChannelRegistration> makeCudaBasicChannel() {
 C10_REGISTER_CREATOR(
     TensorPipeChannelRegistry,
     cuda_basic,
-    makeCudaBasicChannel)
+    makeCudaBasicChannel);
 
 class TensorpipeCudaConverter : public TensorpipeDeviceTypeConverter {
  public:
diff --git a/torch/csrc/jit/passes/onnx/constant_map.h b/torch/csrc/jit/passes/onnx/constant_map.h
index 7b447fa7423..60d4470c1b1 100644
--- a/torch/csrc/jit/passes/onnx/constant_map.h
+++ b/torch/csrc/jit/passes/onnx/constant_map.h
@@ -2,10 +2,15 @@
 
 #include <c10/macros/Macros.h>
 
+C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wsuggest-override")
+C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wnewline-eof")
 #include <onnx/shape_inference/implementation.h>
+C10_DIAGNOSTIC_POP()
+C10_DIAGNOSTIC_POP()
 
 #include <torch/csrc/jit/ir/ir.h>
 #include <torch/csrc/jit/serialization/export.h>
+#include <mutex>
 #include <unordered_map>
 
 namespace torch::jit {
diff --git a/torch/csrc/jit/serialization/export.cpp b/torch/csrc/jit/serialization/export.cpp
index d95c45a7e6b..84c0a46a774 100644
--- a/torch/csrc/jit/serialization/export.cpp
+++ b/torch/csrc/jit/serialization/export.cpp
@@ -19,14 +19,18 @@
 #include <torch/csrc/onnx/back_compat.h>
 #include <torch/csrc/onnx/onnx.h>
 #include <torch/version.h>
+#include <optional>
 
+C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wnewline-eof")
 #include <onnx/checker.h>
+C10_DIAGNOSTIC_POP()
 #include <onnx/onnx_pb.h>
 #include <onnx/proto_utils.h>
+C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wsuggest-override")
 #include <onnx/shape_inference/implementation.h>
+C10_DIAGNOSTIC_POP()
 
 #include <memory>
-#include <optional>
 #include <regex>
 #include <set>
 #include <sstream>