[Profiler] Use strong typedef for Tensor ID (#85718)

I want add Tensor ID to allocations (for allocs which are `StorageImpl`s). To keep things safe and organized I need to pull the ID type into a standalone entity, which makes it an ideal time to convert to a strong typedef. Differential Revision: [D39788872](https://our.internmc.facebook.com/intern/diff/D39788872/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/85718 Approved by: https://github.com/chaekit
2025-12-07 12:21:27 +01:00 · 2022-09-28 15:42:58 -07:00 · 2022-09-28 15:42:58 -07:00 · f23f362c5d
commit f23f362c5d
parent 282d8dfa68
4 changed files with 45 additions and 22 deletions
--- a/torch/csrc/profiler/collection.cpp
+++ b/torch/csrc/profiler/collection.cpp
@ -837,7 +837,7 @@ void calculate_unique_tensor_ids(std::vector<result_ptr_t>& sorted_results) {
    storage_id_t storage_id_;

    // Used to assign the result.
-    std::reference_wrapper<c10::optional<size_t>> id_ref_;
+    std::reference_wrapper<c10::optional<TensorID>> id_ref_;
  };
  std::vector<TensorStoragePair> tensors;

@ -906,7 +906,7 @@ void calculate_unique_tensor_ids(std::vector<result_ptr_t>& sorted_results) {
  // Step 4) Write back to metadata
  // --------------------------------------------------------------------------
  for (const auto& t : tensors) {
-    t.id_ref_.get() = id_map.at(t.storage_id_);
+    t.id_ref_.get() = TensorID(id_map.at(t.storage_id_));
  }
 }

--- a/torch/csrc/profiler/collection.h
+++ b/torch/csrc/profiler/collection.h
@ -54,6 +54,25 @@ using StorageImplData = strong::type<
    strong::hashable,
    strong::boolean>;

+// Identity is a complex concept in PyTorch. A Tensor might not have a
+// an associated storage, multiple Tensors might share the same underlying
+// storage, the storage of a Tensor might change over time, etc.
+//
+// For the purpose of profiling we're mostly interested in data flow
+// analysis. As a result, we can take an expansive view of identity:
+// Tensors share an ID if they share a TensorImpl or storage data.
+//
+// This identity equality is transitive; If Tensors T0 and T1 share a storage
+// S0 and T1 later points to a different storage S1 then all Tensors which
+// point to either S0 or S1 are considered to have the same identity. (Since
+// profiler cannot reason beyond that.)
+//
+// The profiler will handle lifetime analysis to ensure that identities do
+// not run afoul of the ABA problem. This does, however, mean that identities
+// can only be assigned when memory profiling is enabled. (And we cannot
+// handle ABA for TensorImpl as those allocations are not instrumented.)
+using TensorID = strong::type<size_t, struct TensorID_, strong::regular>;
+
 struct RawTensorMetadata {
  TensorImplAddress impl_;
  StorageImplData data_;
@ -75,24 +94,7 @@ struct TensorMetadata : public RawTensorMetadata {
    return {device_type_, device_index_};
  }

-  // Identity is a complex concept in PyTorch. A Tensor might not have a
-  // an associated storage, multiple Tensors might share the same underlying
-  // storage, the storage of a Tensor might change over time, etc.
-  //
-  // For the purpose of profiling we're mostly interested in data flow
-  // analysis. As a result, we can take an expansive view of identity:
-  // Tensors share an ID if they share a TensorImpl or storage data.
-  //
-  // This identity equality is transitive; If Tensors T0 and T1 share a storage
-  // S0 and T1 later points to a different storage S1 then all Tensors which
-  // point to either S0 or S1 are considered to have the same identity. (Since
-  // profiler cannot reason beyond that.)
-  //
-  // The profiler will handle lifetime analysis to ensure that identities do
-  // not run afoul of the ABA problem. This does, however, mean that identities
-  // can only be assigned when memory profiling is enabled. (And we cannot
-  // handle ABA for TensorImpl as those allocations are not instrumented.)
-  c10::optional<size_t> id_;
+  c10::optional<TensorID> id_;
 };

 struct Inputs {
--- a/torch/csrc/profiler/python/init.h
+++ b/torch/csrc/profiler/python/init.h
@ -8,6 +8,7 @@
 namespace pybind11 {
 namespace detail {
 using torch::profiler::impl::StorageImplData;
+using torch::profiler::impl::TensorID;
 using torch::profiler::impl::TensorImplAddress;

 template <>
@ -17,6 +18,9 @@ struct type_caster<StorageImplData>
 template <>
 struct type_caster<TensorImplAddress>
    : public strong_pointer_type_caster<TensorImplAddress> {};
+
+template <>
+struct type_caster<TensorID> : public strong_uint_type_caster<TensorID> {};
 } // namespace detail
 } // namespace pybind11

--- a/torch/csrc/profiler/python/pybind.h
+++ b/torch/csrc/profiler/python/pybind.h
@ -9,8 +9,8 @@
 namespace pybind11 {
 namespace detail {
 // Strong typedefs don't make much sense in Python since everything is duck
-// typed. So instead we simply cast them to ints, return them, and let the
-// caller handle correctness.
+// typed. So instead we simply extract the underlying value and let the caller
+// handle correctness.
 template <typename T>
 struct strong_pointer_type_caster {
  template <typename T_>
@ -29,5 +29,22 @@ struct strong_pointer_type_caster {

  PYBIND11_TYPE_CASTER(T, _("strong_pointer"));
 };
+
+template <typename T>
+struct strong_uint_type_caster {
+  template <typename T_>
+  static handle cast(
+      T_&& src,
+      return_value_policy /*policy*/,
+      handle /*parent*/) {
+    return handle(THPUtils_packUInt64(src.value_of()));
+  }
+
+  bool load(handle /*src*/, bool /*convert*/) {
+    return false;
+  }
+
+  PYBIND11_TYPE_CASTER(T, _("strong_uint"));
+};
 } // namespace detail
 } // namespace pybind11