From 143b5f2745e277416ab861ea914dd5dbc901b573 Mon Sep 17 00:00:00 2001
From: lancerts <tangshao28@gmail.com>
Date: Thu, 15 Feb 2024 19:11:13 +0000
Subject: [PATCH] Fix the missing device in _memory_profiler (#119751)

Fixes #119722,
1, added the missing device in
```
max_memory_allocated = torch.cuda.max_memory_allocated()
max_memory_reserved = torch.cuda.max_memory_reserved()
```
2, fix the device parameter to device_str. Based on [lines](https://github.com/pytorch/pytorch/blob/2bda6b4cb80ea46c09ef8123dd0255a26e7d157f/torch/profiler/profiler.py#L291), the input device are a string (device_str) for
```
self.mem_tl.export_memory_timeline_html
self.mem_tl.export_memory_timeline_raw
self.mem_tl.export_memory_timeline
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/119751
Approved by: https://github.com/aaronenyeshi
---
 torch/profiler/_memory_profiler.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/torch/profiler/_memory_profiler.py b/torch/profiler/_memory_profiler.py
index dcbbe151a64..ebafbb3fd36 100644
--- a/torch/profiler/_memory_profiler.py
+++ b/torch/profiler/_memory_profiler.py
@@ -1053,11 +1053,11 @@ class MemoryProfileTimeline:
         times = [t_min if t < 0 else t for t in times]
         return times, sizes
 
-    def export_memory_timeline(self, path, device) -> None:
+    def export_memory_timeline(self, path, device_str) -> None:
         """Saves the memory timeline as [times, sizes by category]
         as a JSON formatted file to the given path for the given
         device."""
-        times, sizes = self._coalesce_timeline(device)
+        times, sizes = self._coalesce_timeline(device_str)
         # TODO: Write a faster serialize (orjson not available in CI)
         import json
 
@@ -1131,7 +1131,7 @@ class MemoryProfileTimeline:
             json.dump(raw_events, f)
 
     def export_memory_timeline_html(
-        self, path, device, figsize=(20, 12), title=None
+        self, path, device_str, figsize=(20, 12), title=None
     ) -> None:
         """Exports the memory timeline as an HTML file which contains
         the memory timeline plot embedded as a PNG file."""
@@ -1152,14 +1152,15 @@ class MemoryProfileTimeline:
         import matplotlib.pyplot as plt
         import numpy as np
 
-        mt = self._coalesce_timeline(device)
+        mt = self._coalesce_timeline(device_str)
         times, sizes = np.array(mt[0]), np.array(mt[1])
         # For this timeline, start at 0 to match Chrome traces.
         t_min = min(times)
         times -= t_min
         stacked = np.cumsum(sizes, axis=1) / 1024**3
-        max_memory_allocated = torch.cuda.max_memory_allocated()
-        max_memory_reserved = torch.cuda.max_memory_reserved()
+        device = torch.device(device_str)
+        max_memory_allocated = torch.cuda.max_memory_allocated(device)
+        max_memory_reserved = torch.cuda.max_memory_reserved(device)
 
         # Plot memory timeline as stacked data
         fig = plt.figure(figsize=figsize, dpi=80)