some reference and move fixes (#95942)

This PR introduces some modifications: 1. We find out some const function parameters that can be passed by reference and add the reference. 2. We find more opportunists of passing by value and change them accordingly. 3. Some use-after-move errors are fixed. Pull Request resolved: https://github.com/pytorch/pytorch/pull/95942 Approved by: https://github.com/Skylion007
2025-12-06 12:20:52 +01:00 · 2023-03-10 03:44:09 +00:00 · 2023-03-10 03:44:09 +00:00 · d0e4ca233e
commit d0e4ca233e
parent 6e0359dd42
30 changed files with 46 additions and 60 deletions
--- a/aten/src/ATen/FunctionalTensorWrapper.cpp
+++ b/aten/src/ATen/FunctionalTensorWrapper.cpp
@ -476,7 +476,7 @@ void sync(ITensorListRef t_list) {
    sync(t);
  }
 }
-void sync(const c10::List<c10::optional<Tensor>> t_list) {
+void sync(const c10::List<c10::optional<Tensor>>& t_list) {
  for (const auto i : c10::irange(t_list.size())) {
    sync(t_list[i]);
  }
--- a/aten/src/ATen/FunctionalTensorWrapper.h
+++ b/aten/src/ATen/FunctionalTensorWrapper.h
@ -213,7 +213,7 @@ TORCH_API std::vector<Tensor> from_functional_tensor(ITensorListRef t_list);

 TORCH_API void sync(const at::Tensor& t);
 TORCH_API void sync(const c10::optional<Tensor>& t);
-TORCH_API void sync(const c10::List<c10::optional<Tensor>> t_list);
+TORCH_API void sync(const c10::List<c10::optional<Tensor>>& t_list);
 TORCH_API void sync(ITensorListRef t_list);

 TORCH_API void replace_(const Tensor& functional_tensor, const Tensor& other);
--- a/aten/src/ATen/core/jit_type.h
+++ b/aten/src/ATen/core/jit_type.h
@ -824,8 +824,8 @@ struct TORCH_API TensorType : public SharedType {
  TensorType(
      c10::optional<at::ScalarType> scalar_type,
      c10::optional<Device> device,
-      const SymbolicShape& sizes,
-      const VaryingShape<Stride>& strides,
+      SymbolicShape sizes,
+      VaryingShape<Stride> strides,
      c10::optional<bool> requires_grad,
      c10::optional<bool> undefined = false);

--- a/aten/src/ATen/core/tensor_type.cpp
+++ b/aten/src/ATen/core/tensor_type.cpp
@ -424,16 +424,15 @@ VaryingShape<int64_t> TensorType::strides() const {
 TensorType::TensorType(
    c10::optional<at::ScalarType> scalar_type,
    c10::optional<Device> device,
-    // NOLINTNEXTLINE(modernize-pass-by-value)
-    const SymbolicShape& sizes,
-    const VaryingShape<Stride>& strides,
+    SymbolicShape sizes,
+    VaryingShape<Stride> strides,
    c10::optional<bool> requires_grad,
    c10::optional<bool> undefined)
    : SharedType(TypeKind::TensorType),
      scalar_type_(scalar_type),
      device_(device),
-      sizes_(sizes),
-      strides_(strides),
+      sizes_(std::move(sizes)),
+      strides_(std::move(strides)),
      requires_grad_(requires_grad),
      undefined_(undefined) {}

--- a/aten/src/ATen/cpu/vec/functional_base.h
+++ b/aten/src/ATen/cpu/vec/functional_base.h
@ -29,7 +29,7 @@ inline scalar_t vec_reduce_all(

 template <typename scalar_t, typename Op>
 struct VecReduceAllSIMD {
-  static inline scalar_t apply(const Op& vec_fun, Vectorized<scalar_t> acc_vec) {
+  static inline scalar_t apply(const Op& vec_fun, const Vectorized<scalar_t>& acc_vec) {
    return vec_reduce_all(vec_fun, acc_vec, Vectorized<scalar_t>::size());
  }
 };
@ -38,7 +38,7 @@ struct VecReduceAllSIMD {
 #if defined(CPU_CAPABILITY_AVX2)
 template <typename Op>
 struct VecReduceAllSIMD<float, Op> {
-  static inline float apply(const Op& vec_fun, Vectorized<float> acc_vec) {
+  static inline float apply(const Op& vec_fun, const Vectorized<float>& acc_vec) {
    using Vec = Vectorized<float>;
    Vec v = acc_vec;
    // 128-bit shuffle
@ -57,7 +57,7 @@ struct VecReduceAllSIMD<float, Op> {
 #if defined(CPU_CAPABILITY_AVX512)
 template <typename Op>
 struct VecReduceAllSIMD<float, Op> {
-  static inline float apply(const Op& vec_fun, Vectorized<float> acc_vec) {
+  static inline float apply(const Op& vec_fun, const Vectorized<float>& acc_vec) {
    using Vec = Vectorized<float>;
    Vec v = acc_vec;
    // 256-bit shuffle
@ -79,7 +79,7 @@ struct VecReduceAllSIMD<float, Op> {
 #endif // defined(__GNUC__) && (__GNUC__ > 5) && !defined(_MSC_VER) && !defined(C10_MOBILE)

 template <typename scalar_t, typename Op>
-inline scalar_t vec_reduce_all(const Op& vec_fun, Vectorized<scalar_t> acc_vec) {
+inline scalar_t vec_reduce_all(const Op& vec_fun, const Vectorized<scalar_t>& acc_vec) {
  return VecReduceAllSIMD<scalar_t, Op>::apply(vec_fun, acc_vec);
 }

--- a/aten/src/ATen/functorch/PlumbingHelper.cpp
+++ b/aten/src/ATen/functorch/PlumbingHelper.cpp
@ -71,7 +71,7 @@ bool isBatchedAtLevel(ITensorListRef tensors, int64_t level) {
  return false;
 }

-bool isBatchedAtLevel(const c10::List<c10::optional<Tensor>> maybe_tensors, int64_t level) {
+bool isBatchedAtLevel(const c10::List<c10::optional<Tensor>>& maybe_tensors, int64_t level) {
  for (const auto idx : c10::irange(0, maybe_tensors.size())) {
    const auto& maybe_tensor = maybe_tensors.get(idx);
    if (isBatchedAtLevel(maybe_tensor, level)) {
--- a/aten/src/ATen/functorch/PlumbingHelper.h
+++ b/aten/src/ATen/functorch/PlumbingHelper.h
@ -42,7 +42,7 @@ TORCH_API std::vector<Tensor> makeBatchedVector(const std::vector<Tensor>& tenso

 // Returns True if ANY tensor in tensors is batched at level
 TORCH_API bool isBatchedAtLevel(ITensorListRef tensors, int64_t level);
-TORCH_API bool isBatchedAtLevel(const c10::List<c10::optional<Tensor>> maybe_tensors, int64_t level);
+TORCH_API bool isBatchedAtLevel(const c10::List<c10::optional<Tensor>>& maybe_tensors, int64_t level);
 TORCH_API bool isBatchedAtLevel(const Tensor& tensor, int64_t level);
 TORCH_API bool isBatchedAtLevel(const c10::optional<Tensor>& maybe_tensor, int64_t level);

--- a/aten/src/ATen/native/Resize.h
+++ b/aten/src/ATen/native/Resize.h
@ -78,7 +78,7 @@ static inline void checkInBoundsForStorage(
    ArrayRef<T> size,
    ArrayRef<T> stride,
    T storage_offset,
-    const caffe2::TypeMeta data_type,
+    const caffe2::TypeMeta& data_type,
    const Storage& new_storage) {
  T storage_size_bytes =
      at::detail::computeStorageNbytes(size, stride, data_type.itemsize());
--- a/aten/src/ATen/native/sparse/SparseBinaryOpIntersectionCommon.h
+++ b/aten/src/ATen/native/sparse/SparseBinaryOpIntersectionCommon.h
@ -133,7 +133,7 @@ void _sparse_binary_op_intersection_kernel_impl(
    Tensor& res,
    const Tensor& x_,
    const Tensor& y_,
-    const std::vector<int64_t> broadcasted_shape,
+    const std::vector<int64_t>& broadcasted_shape,
    const bool restrict_indices_to_rhs = false,
    const bool distributive_with_sum = true
 ) {
--- a/aten/src/ATen/quantized/QTensorImpl.cpp
+++ b/aten/src/ATen/quantized/QTensorImpl.cpp
@ -6,20 +6,18 @@ QTensorImpl::QTensorImpl(
    Storage&& storage,
    DispatchKeySet key_set,
    const caffe2::TypeMeta data_type,
-    // NOLINTNEXTLINE(modernize-pass-by-value)
    QuantizerPtr quantizer)
-    : TensorImpl(std::move(storage), key_set, data_type),
-      quantizer_(quantizer) {}
+    : TensorImpl(std::move(storage), std::move(key_set), data_type),
+      quantizer_(std::move(quantizer)) {}

 QTensorImpl::QTensorImpl(
    ImplType type,
    Storage&& storage,
    DispatchKeySet key_set,
    const caffe2::TypeMeta data_type,
-    // NOLINTNEXTLINE(modernize-pass-by-value)
    QuantizerPtr quantizer)
-    : TensorImpl(type, std::move(storage), key_set, data_type),
-      quantizer_(quantizer) {}
+    : TensorImpl(type, std::move(storage), std::move(key_set), data_type),
+      quantizer_(std::move(quantizer)) {}

 const char* QTensorImpl::tensorimpl_type_name() const {
  return "QTensorImpl";
--- a/c10/core/TensorImpl.h
+++ b/c10/core/TensorImpl.h
@ -2105,7 +2105,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
   * If the existing data does not match the desired type, it will be deleted
   * and a new storage will be created.
   */
-  inline void* raw_mutable_data(const caffe2::TypeMeta meta) {
+  inline void* raw_mutable_data(const caffe2::TypeMeta& meta) {
    // For 0-size tensors it's fine to return any pointer (including nullptr)
    if (data_type_ == meta && storage_initialized()) {
      return static_cast<void*>(
--- a/c10/core/TensorOptions.h
+++ b/c10/core/TensorOptions.h
@ -613,7 +613,7 @@ inline TensorOptions dtype() {
  return dtype(caffe2::TypeMeta::Make<T>());
 }

-inline std::string toString(const TensorOptions options) {
+inline std::string toString(const TensorOptions& options) {
  std::ostringstream stream;
  stream << options;
  return stream.str();
@ -763,7 +763,7 @@ inline TensorOptions dispatchKeyToTensorOptions(DispatchKey dispatch_key) {
 }

 namespace detail {
-inline bool backend_supports_empty_operator(const TensorOptions options) {
+inline bool backend_supports_empty_operator(const TensorOptions& options) {
  // Quantized backends don't support at::empty().
  // They have separate operators like at::empty_quantized() that take in
  // extra information about how to quantize the tensor.
--- a/c10/util/typeid.h
+++ b/c10/util/typeid.h
@ -422,7 +422,7 @@ class C10_API TypeMeta final {
    return data().name_;
  }

-  friend bool operator==(const TypeMeta lhs, const TypeMeta rhs) noexcept;
+  friend bool operator==(const TypeMeta& lhs, const TypeMeta& rhs) noexcept;

  template <typename T>
  bool Match() const noexcept {
@ -597,10 +597,10 @@ C10_EXPORT constexpr uint16_t TypeMeta::_typeMetaData<
 inline TypeMeta::TypeMeta() noexcept
    : index_(_typeMetaData<detail::_Uninitialized>()) {}

-inline bool operator==(const TypeMeta lhs, const TypeMeta rhs) noexcept {
+inline bool operator==(const TypeMeta& lhs, const TypeMeta& rhs) noexcept {
  return (lhs.index_ == rhs.index_);
 }
-inline bool operator!=(const TypeMeta lhs, const TypeMeta rhs) noexcept {
+inline bool operator!=(const TypeMeta& lhs, const TypeMeta& rhs) noexcept {
  return !operator==(lhs, rhs);
 }

--- a/caffe2/core/plan_executor.cc
+++ b/caffe2/core/plan_executor.cc
@ -624,7 +624,6 @@ bool ExecuteStepRecursive(ExecutionStepWrapper& stepWrapper) {
        }
        if (compiledStep->gotFailure) {
          LOG(ERROR) << "One of the workers failed.";
-          // NOLINTNEXTLINE(bugprone-use-after-move)
          if (first_exception) {
            first_exception.rethrowException();
          }
--- a/caffe2/opt/onnxifi_transformer.cc
+++ b/caffe2/opt/onnxifi_transformer.cc
@ -888,14 +888,14 @@ NetDef OnnxifiTransformer::SubnetToOnnxifiOpViaOnnx(
      VLOG(2) << "Adding extra init tensor: " << t.name();
      TensorShape shape;
      shape.mutable_dims()->CopyFrom(t.dims());
+      auto dims_size = shape.dims_size();
      auto ret = shape_hints_onnx_.emplace(t.name(), std::move(shape));
      shape_hints_max_bs->emplace(
          std::piecewise_construct,
          std::forward_as_tuple(ret.first->first),
          std::forward_as_tuple(
              std::vector<TensorBoundShape::DimType>(
-                  // NOLINTNEXTLINE(bugprone-use-after-move)
-                  shape.dims_size(), TensorBoundShape_DimType_CONSTANT),
+                  dims_size, TensorBoundShape_DimType_CONSTANT),
              ret.first->second));

      // Feed into workspace as CPU Tensors
--- a/torch/csrc/api/include/torch/data/datasets/chunk.h
+++ b/torch/csrc/api/include/torch/data/datasets/chunk.h
@ -329,14 +329,13 @@ class ChunkDataset final
      ChunkSampler chunk_sampler,
      ExampleSampler example_sampler,
      ChunkDatasetOptions options,
-      // NOLINTNEXTLINE(modernize-pass-by-value)
      std::function<void(UnwrappedBatchType&)> preprocessing_policy =
          std::function<void(UnwrappedBatchType&)>())
      : chunk_reader_(std::move(chunk_reader)),
        chunk_sampler_(std::move(chunk_sampler)),
        example_sampler_(std::move(example_sampler)),
        options_(std::move(options)),
-        preprocessing_policy_(preprocessing_policy),
+        preprocessing_policy_(std::move(preprocessing_policy)),
        quit_worker_(false),
        running_preloaders_(0),
        load_checkpoint_(false) {}
--- a/torch/csrc/autograd/cpp_hook.cpp
+++ b/torch/csrc/autograd/cpp_hook.cpp
@ -22,11 +22,10 @@ void check_single_result(
 namespace torch {
 namespace autograd {

-// NOLINTNEXTLINE(modernize-pass-by-value)
 CppFunctionTensorPreHook::CppFunctionTensorPreHook(
-    const std::shared_ptr<hooks_list>& hooks,
+    std::shared_ptr<hooks_list> hooks,
    int value_idx)
-    : hooks_(hooks), value_idx_(value_idx) {}
+    : hooks_(std::move(hooks)), value_idx_(value_idx) {}

 variable_list CppFunctionTensorPreHook::operator()(
    const variable_list& values) {
--- a/torch/csrc/autograd/cpp_hook.h
+++ b/torch/csrc/autograd/cpp_hook.h
@ -10,9 +10,7 @@ using hooks_list =
    std::vector<std::function<at::TensorBase(const at::TensorBase&)>>;

 struct CppFunctionTensorPreHook : public FunctionPreHook {
-  CppFunctionTensorPreHook(
-      const std::shared_ptr<hooks_list>& hooks,
-      int value_idx);
+  CppFunctionTensorPreHook(std::shared_ptr<hooks_list> hooks, int value_idx);
  variable_list operator()(const variable_list& values) override;

  std::shared_ptr<hooks_list> hooks_;
--- a/torch/csrc/autograd/engine.cpp
+++ b/torch/csrc/autograd/engine.cpp
@ -930,7 +930,6 @@ static variable_list call_function(
  });

  if (has_post_hooks) {
-    // NOLINTNEXTLINE(bugprone-use-after-move)
    return call_post_hooks(fn, std::move(outputs), inputs);
  }
  return outputs;
--- a/torch/csrc/autograd/functions/utils.h
+++ b/torch/csrc/autograd/functions/utils.h
@ -109,7 +109,7 @@ inline bool isFwGradDefinedTensorList(const at::ITensorListRef& variables) {
 }

 inline bool isFwGradDefinedTensorList(
-    const c10::List<c10::optional<at::Tensor>> li) {
+    const c10::List<c10::optional<at::Tensor>>& li) {
  bool ret = false;
  for (auto i : c10::irange(li.size())) {
    auto t = li.get(i);
--- a/torch/csrc/autograd/input_metadata.h
+++ b/torch/csrc/autograd/input_metadata.h
@ -41,7 +41,7 @@ struct InputMetadata {
  InputMetadata() = default;

  InputMetadata(
-      const at::TensorOptions options,
+      const at::TensorOptions& options,
      MetadataShape input_shape,
      bool is_tensor_subclass)
      : options_{options},
--- a/torch/csrc/distributed/autograd/engine/dist_engine.h
+++ b/torch/csrc/distributed/autograd/engine/dist_engine.h
@ -160,9 +160,8 @@ class TORCH_API DistEngine {
 // Guard to clean up resources once the backward pass is done.
 class BackwardPassCleanupGuard {
 public:
-  // NOLINTNEXTLINE(modernize-pass-by-value)
-  explicit BackwardPassCleanupGuard(const ContextPtr& autogradContext)
-      : autogradContext_(autogradContext) {}
+  explicit BackwardPassCleanupGuard(ContextPtr autogradContext)
+      : autogradContext_(std::move(autogradContext)) {}

  ~BackwardPassCleanupGuard() {
    DistEngine::getInstance().cleanupBackwardPass(autogradContext_);
--- a/torch/csrc/distributed/rpc/profiler/server_process_global_profiler.h
+++ b/torch/csrc/distributed/rpc/profiler/server_process_global_profiler.h
@ -82,11 +82,9 @@ TORCH_API extern mutexType currentStateStackEntryMutex;
 class StateStackEntry {
 public:
  StateStackEntry(
-      // NOLINTNEXTLINE(modernize-pass-by-value)
      std::shared_ptr<StateStackEntry> prevPtr,
-      // NOLINTNEXTLINE(modernize-pass-by-value)
      std::shared_ptr<State> statePtr)
-      : prevPtr_(prevPtr), statePtr_(statePtr) {}
+      : prevPtr_(std::move(prevPtr)), statePtr_(std::move(statePtr)) {}

  static void pushRange(std::shared_ptr<State> profilerProcessGlobalStatePtr);
  static std::shared_ptr<State> popRange();
--- a/torch/csrc/distributed/rpc/tensorpipe_agent.h
+++ b/torch/csrc/distributed/rpc/tensorpipe_agent.h
@ -314,8 +314,8 @@ class TORCH_API TensorPipeAgent : public RpcAgent {
  // TODO: To achieve better performance we can have a pipe pool per
  // client that can be configured using RpcBackendOptions.
  struct ClientPipe {
-    // NOLINTNEXTLINE(modernize-pass-by-value)
-    explicit ClientPipe(std::shared_ptr<tensorpipe::Pipe> pipe) : pipe_(pipe) {}
+    explicit ClientPipe(std::shared_ptr<tensorpipe::Pipe> pipe)
+        : pipe_(std::move(pipe)) {}
    std::shared_ptr<tensorpipe::Pipe> pipe_;
    mutable std::mutex mutex_;
    bool inError_{false};
@ -359,11 +359,10 @@ class TORCH_API TensorPipeAgent : public RpcAgent {
  struct TimeoutMessageMetadata {
    TimeoutMessageMetadata(
        uint64_t messageId_,
-        // NOLINTNEXTLINE(modernize-pass-by-value)
        std::shared_ptr<AtomicJitFuture> responseFuture_,
        std::chrono::milliseconds timeout_)
        : messageId(messageId_),
-          responseFuture(responseFuture_),
+          responseFuture(std::move(responseFuture_)),
          timeout(timeout_) {}
    uint64_t messageId;
    std::shared_ptr<AtomicJitFuture> responseFuture;
--- a/torch/csrc/jit/mobile/module.h
+++ b/torch/csrc/jit/mobile/module.h
@ -62,10 +62,9 @@ class CompilationUnit {
 class TORCH_API Module {
 public:
  Module(
-      // NOLINTNEXTLINE(modernize-pass-by-value)
      c10::intrusive_ptr<c10::ivalue::Object> object,
      std::shared_ptr<CompilationUnit> cu)
-      : object_(object), cu_(std::move(cu)) {}
+      : object_(std::move(object)), cu_(std::move(cu)) {}
  Module() = default;
  Method get_method(const std::string& method_name) const;
  template <typename... Types>
--- a/torch/csrc/lazy/backend/backend_device.cpp
+++ b/torch/csrc/lazy/backend/backend_device.cpp
@ -75,7 +75,7 @@ c10::optional<BackendDevice> GetBackendDevice(const at::Tensor& tensor) {
 }

 c10::optional<BackendDevice> GetBackendDevice(
-    const c10::optional<c10::Device> device) {
+    const c10::optional<c10::Device>& device) {
  if (device) {
    return c10::make_optional(atenDeviceToBackendDevice(*device));
  }
--- a/torch/csrc/lazy/core/lazy_graph_executor.cpp
+++ b/torch/csrc/lazy/core/lazy_graph_executor.cpp
@ -560,7 +560,7 @@ void LazyGraphExecutor::Async::Wait() {
  }
 }

-bool LazyGraphExecutor::ShouldSyncTensor(const LazyTensorPtr tensor) const {
+bool LazyGraphExecutor::ShouldSyncTensor(const LazyTensorPtr& tensor) const {
  return tensor->GetIrValue()->op() != ltc_not_supported;
 }

--- a/torch/csrc/lazy/core/lazy_graph_executor.h
+++ b/torch/csrc/lazy/core/lazy_graph_executor.h
@ -348,7 +348,7 @@ class TORCH_API LazyGraphExecutor {
    std::vector<BackendDataPtr> parameters_data;
  };

-  virtual bool ShouldSyncTensor(const LazyTensorPtr tensor) const;
+  virtual bool ShouldSyncTensor(const LazyTensorPtr& tensor) const;

  SyncTensorCollection CollectSyncTensors(
      const std::vector<LazyTensorPtr>& tensors,
--- a/torch/csrc/utils/tensor_types.cpp
+++ b/torch/csrc/utils/tensor_types.cpp
@ -54,7 +54,7 @@ static const char* backend_to_string(const at::Backend& backend) {
  }
 }

-std::string options_to_string(const at::TensorOptions options) {
+std::string options_to_string(const at::TensorOptions& options) {
  std::ostringstream ss;
  ss << backend_to_string(options.backend()) << "."
     << toString(at::typeMetaToScalarType(options.dtype())) << "Tensor";
--- a/torch/csrc/utils/tensor_types.h
+++ b/torch/csrc/utils/tensor_types.h
@ -8,7 +8,7 @@
 namespace torch {
 namespace utils {

-std::string options_to_string(const at::TensorOptions options);
+std::string options_to_string(const at::TensorOptions& options);
 std::string type_to_string(const at::DeprecatedTypeProperties& type);
 at::TensorOptions options_from_string(const std::string& str);