[2/N] Enable clang-tidy checks in torch/csrc/profiler (#113439)

Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/113439 Approved by: https://github.com/Skylion007
2025-12-07 12:21:27 +01:00 · 2023-11-14 00:39:54 +00:00 · 2023-11-14 00:39:54 +00:00 · ff82dcd8fa
commit ff82dcd8fa
parent a43c757275
16 changed files with 30 additions and 23 deletions
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -266,7 +266,6 @@ exclude_patterns = [
    'torch/csrc/jit/serialization/export.cpp',
    'torch/csrc/lazy/**/*',
    'torch/csrc/onnx/init.cpp',
-    'torch/csrc/profiler/**/*',
    'torch/csrc/quantized/**/*',
    'torch/csrc/mps/**/*',
 ]
--- a/torch/csrc/profiler/collection.h
+++ b/torch/csrc/profiler/collection.h
@ -47,9 +47,9 @@ struct TORCH_API RawTensorMetadataBase {
  explicit RawTensorMetadataBase(const at::Tensor& t);

  StorageImplData data_;
-  c10::ScalarType dtype_;
-  c10::Layout layout_;
-  uint32_t dim_;
+  c10::ScalarType dtype_{c10::ScalarType::Undefined};
+  c10::Layout layout_{c10::Layout::Strided};
+  uint32_t dim_{0};
 };

 // Collected during profiling.
@ -64,8 +64,8 @@ struct TORCH_API RawTensorMetadata : RawTensorMetadataBase {
  // Wrap `weak_self_` in `c10::optional` and split device into components to
  // keep struct default constructable. (which the std::array initializer needs)
  c10::optional<WeakTensor> weak_self_;
-  c10::DeviceType device_type_;
-  c10::DeviceIndex device_index_;
+  c10::DeviceType device_type_{c10::DeviceType::CPU};
+  c10::DeviceIndex device_index_{-1};
 };

 // Used during post processing.
@ -367,8 +367,8 @@ struct TORCH_API Result : public std::enable_shared_from_this<Result> {
  template <typename T, typename Fn>
  void visit_if_base(Fn&& fn) const {
    visit([&](const auto& extra_fields) {
-      using extra_fields_t = typename std::remove_cv<
-          typename std::remove_reference<decltype(extra_fields)>::type>::type;
+      using extra_fields_t = typename std::remove_cv_t<
+          typename std::remove_reference_t<decltype(extra_fields)>>;

      if constexpr (std::is_base_of_v<T, extra_fields_t>) {
        fn(extra_fields);
--- a/torch/csrc/profiler/combined_traceback.cpp
+++ b/torch/csrc/profiler/combined_traceback.cpp
@ -90,6 +90,7 @@ SymbolizedTracebacks symbolize(
  for (const auto& e : to_symbolize) {
    if (e->python_) {
      if (cur_python != e->python_ && !cur_py_frames.empty()) {
+        // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
        cur_python->appendSymbolized(cur_py_frames, r);
        cur_py_frames.clear();
      }
@ -103,11 +104,13 @@ SymbolizedTracebacks symbolize(
    }
  }
  if (!cur_py_frames.empty()) {
+    // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
    cur_python->appendSymbolized(cur_py_frames, r);
    cur_py_frames.clear();
  }
  std::vector<std::vector<uint64_t>> python_frame_fragments =
      std::move(r.tracebacks);
+  r.tracebacks = {};

  for (const auto& sc : to_symbolize) {
    r.tracebacks.emplace_back();
--- a/torch/csrc/profiler/combined_traceback.h
+++ b/torch/csrc/profiler/combined_traceback.h
@ -29,7 +29,7 @@ struct TORCH_API CapturedTraceback : public c10::GatheredContext {
  CapturedTraceback() = default;
  CapturedTraceback(const CapturedTraceback&) = delete;
  CapturedTraceback& operator=(const CapturedTraceback&) = delete;
-  ~CapturedTraceback();
+  ~CapturedTraceback() override;

  using visitproc = int (*)(void* self, void* arg);

--- a/torch/csrc/profiler/containers.h
+++ b/torch/csrc/profiler/containers.h
@ -7,7 +7,6 @@
 #include <forward_list>
 #include <new>
 #include <utility>
-#include <vector>

 #include <c10/macros/Macros.h>
 #include <c10/util/ArrayRef.h>
@ -49,7 +48,7 @@ class AppendOnlyList {
 public:
  using array_t = block_t<T, ChunkSize>;
  static_assert(
-      std::is_base_of<std::array<T, ChunkSize>, array_t>::value,
+      std::is_base_of_v<std::array<T, ChunkSize>, array_t>,
      "AppendOnlyList expects raw low level pointer storage.");
  static_assert(ChunkSize > 0, "Block cannot be empty.");

--- a/torch/csrc/profiler/data_flow.cpp
+++ b/torch/csrc/profiler/data_flow.cpp
@ -10,6 +10,7 @@ namespace impl {
 namespace {
 static constexpr TensorImplAddress NoTensorImpl{nullptr};

+// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
 struct RawTensorInfo {
  TensorImplAddress impl_;
  StorageImplData storage_;
--- a/torch/csrc/profiler/events.h
+++ b/torch/csrc/profiler/events.h
@ -1,6 +1,7 @@
 #pragma once

 #include <array>
+#include <cstdint>
 #include <cstring>
 #include <vector>

--- a/torch/csrc/profiler/kineto_client_interface.cpp
+++ b/torch/csrc/profiler/kineto_client_interface.cpp
@ -1,7 +1,6 @@
 #ifdef USE_KINETO
 #include <libkineto.h>
 #include <torch/csrc/autograd/profiler_kineto.h>
-#include <cstdlib>

 // Ondemand tracing is not supported on Apple or edge platform
 #if defined(__APPLE__) || defined(EDGE_PROFILER_USE_KINETO)
--- a/torch/csrc/profiler/kineto_shim.cpp
+++ b/torch/csrc/profiler/kineto_shim.cpp
@ -1,8 +1,6 @@
 #include <torch/csrc/profiler/collection.h>
 #include <torch/csrc/profiler/kineto_shim.h>

-#include <type_traits>
-
 #ifdef USE_KINETO
 #include <libkineto.h>
 #endif
--- a/torch/csrc/profiler/orchestration/observer.cpp
+++ b/torch/csrc/profiler/orchestration/observer.cpp
@ -94,8 +94,8 @@ ProfilerConfig ProfilerConfig::fromIValue(
 // ----------------------------------------------------------------------------
 // -- Profiler base class -----------------------------------------------------
 // ----------------------------------------------------------------------------
-/*explicit*/ ProfilerStateBase::ProfilerStateBase(const ProfilerConfig& config)
-    : c10::MemoryReportingInfoBase(), config_(config) {}
+/*explicit*/ ProfilerStateBase::ProfilerStateBase(ProfilerConfig config)
+    : c10::MemoryReportingInfoBase(), config_(std::move(config)) {}

 ProfilerStateBase::~ProfilerStateBase() {
  if (handle_) {
--- a/torch/csrc/profiler/orchestration/observer.h
+++ b/torch/csrc/profiler/orchestration/observer.h
@ -108,7 +108,7 @@ struct TORCH_API ProfilerConfig {
 // -- Profiler base class -----------------------------------------------------
 // ----------------------------------------------------------------------------
 struct TORCH_API ProfilerStateBase : public c10::MemoryReportingInfoBase {
-  explicit ProfilerStateBase(const ProfilerConfig& config);
+  explicit ProfilerStateBase(ProfilerConfig config);
  ~ProfilerStateBase() override;

  static ProfilerStateBase* get(bool global);
--- a/torch/csrc/profiler/python/combined_traceback.cpp
+++ b/torch/csrc/profiler/python/combined_traceback.cpp
@ -2,8 +2,6 @@
 #include <torch/csrc/python_headers.h>
 #include <torch/csrc/utils/pybind.h>
 #include <torch/csrc/utils/pythoncapi_compat.h>
-#include <iostream>
-
 namespace py = pybind11;

 namespace torch {
--- a/torch/csrc/profiler/python/init.cpp
+++ b/torch/csrc/profiler/python/init.cpp
@ -56,6 +56,7 @@ PyTypeObject THPCapturedTracebackType = {
    nullptr, /* tp_getattro */
    nullptr, /* tp_setattro */
    nullptr, /* tp_as_buffer */
+    // NOLINTNEXTLINE(misc-redundant-expression)
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
    nullptr, /* tp_doc */
    (traverseproc)THPCapturedTraceback_traverse, /* tp_traverse */
--- a/torch/csrc/profiler/standalone/execution_trace_observer.cpp
+++ b/torch/csrc/profiler/standalone/execution_trace_observer.cpp
@ -10,7 +10,6 @@
 #endif // _WIN32

 #include <fmt/format.h>
-#include <algorithm>
 #include <chrono>
 #include <cmath>
 #include <fstream>
@ -19,7 +18,6 @@
 #include <mutex>
 #include <sstream>
 #include <stack>
-#include <stdexcept>
 #include <vector>

 #include <ATen/core/TensorBody.h>
--- a/torch/csrc/profiler/unwind/unwind.cpp
+++ b/torch/csrc/profiler/unwind/unwind.cpp
@ -59,6 +59,7 @@ struct UpgradeExclusive {
  }

 private:
+  // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
  std::shared_lock<std::shared_timed_mutex>& rdlock_;
 };

@ -97,6 +98,7 @@ struct LibraryInfo {
 };

 static const char* process_name() {
+  // NOLINTNEXTLINE(*-c-arrays*)
  static char name[PATH_MAX + 1] = "";
  if (*name == '\0') {
    ssize_t len = readlink("/proc/self/exe", name, PATH_MAX);
@ -145,6 +147,7 @@ struct UnwindCache {
                library_name = process_name();
              }
              auto eh_frame_hdr =
+                  // NOLINTNEXTLINE(performance-no-int-to-ptr)
                  (void*)(segments[i].p_vaddr + info->dlpi_addr);
              self->all_libraries_.emplace_back(
                  std::move(library_name),
@ -272,6 +275,7 @@ extern "C" void unwind_c(std::vector<void*>* result, int64_t rsp, int64_t rbp);
 extern "C" void unwind_c(std::vector<void*>* result, int64_t rsp, int64_t rbp) {
  std::shared_lock lock(cache_mutex_);
  UnwindState state{};
+  // NOLINTNEXTLINE(performance-no-int-to-ptr)
  state.rip = *(int64_t*)(rsp);
  // +8 because we saved rsp after the return address was already pushed
  // to the stack
@ -279,6 +283,7 @@ extern "C" void unwind_c(std::vector<void*>* result, int64_t rsp, int64_t rbp) {
  state.rbp = rbp;
  unwind_cache.checkRefresh(lock);
  while (true) { // unwind for _start sets rip as being undefined
+    // NOLINTNEXTLINE(performance-no-int-to-ptr)
    result->push_back((void*)state.rip);
    const Unwinder& uw = unwind_cache.unwinderFor(state.rip, lock);
    if (uw.terminator()) {
@ -355,6 +360,7 @@ struct Symbolizer {
    auto& entry = getOrCreate(maybe_library->first);
    entry.queried.push_back(addr);
    auto libaddress = maybe_library->second - 1;
+    // NOLINTNEXTLINE(performance-no-int-to-ptr)
    entry.comm->out() << (void*)libaddress << "\n";
    // we need to make sure we don't write more than 64k bytes to
    // a pipe before reading the results. Otherwise the buffer may
@ -393,6 +399,7 @@ struct Symbolizer {
  Entry& getOrCreate(const std::string& name) {
    auto it = entries_.find(name);
    if (it == entries_.end()) {
+      // NOLINTNEXTLINE(*-c-arrays*)
      const char* args[] = {
          "addr2line", "-C", "-f", "-e", name.c_str(), nullptr};
      it = entries_
--- a/torch/csrc/profiler/unwind/unwinder.h
+++ b/torch/csrc/profiler/unwind/unwinder.h
@ -1,7 +1,7 @@
 #pragma once
-#include <stdint.h>
 #include <torch/csrc/profiler/unwind/action.h>
 #include <torch/csrc/profiler/unwind/unwind_error.h>
+#include <cstdint>
 #include <limits>

 struct UnwindState {
@ -54,10 +54,13 @@ struct Unwinder {
    r.rsp = (reg_ == D_RSP ? cur.rsp : cur.rbp) + off_;
    r.rbp = rbp_off_ == std::numeric_limits<int64_t>::max()
        ? cur.rbp
+        // NOLINTNEXTLINE(performance-no-int-to-ptr)
        : *(int64_t*)(r.rsp + rbp_off_);
    if (deref_) {
+      // NOLINTNEXTLINE(performance-no-int-to-ptr)
      r.rsp = *(int64_t*)r.rsp;
    }
+    // NOLINTNEXTLINE(performance-no-int-to-ptr)
    r.rip = *(int64_t*)(r.rsp + rip_off_);

    return r;
@ -70,5 +73,5 @@ struct Unwinder {
  int64_t off_;
  int64_t rip_off_;
  int64_t rbp_off_;
-  bool deref_;
+  bool deref_{false};
 };