mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
According to the [doc](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g0907ca7a1e7d0211b71ee49c5403072b): > enum cudaGraphDebugDotFlags > CUDA Graph debug write options > > Values > cudaGraphDebugDotFlagsVerbose = 1<<0 > Output all debug data as if every debug flag is enabled > cudaGraphDebugDotFlagsKernelNodeParams = 1<<2 > Adds cudaKernelNodeParams to output > cudaGraphDebugDotFlagsMemcpyNodeParams = 1<<3 > Adds cudaMemcpy3DParms to output > cudaGraphDebugDotFlagsMemsetNodeParams = 1<<4 > Adds cudaMemsetParams to output > cudaGraphDebugDotFlagsHostNodeParams = 1<<5 > Adds cudaHostNodeParams to output > cudaGraphDebugDotFlagsEventNodeParams = 1<<6 > Adds cudaEvent_t handle from record and wait nodes to output > cudaGraphDebugDotFlagsExtSemasSignalNodeParams = 1<<7 > Adds cudaExternalSemaphoreSignalNodeParams values to output > cudaGraphDebugDotFlagsExtSemasWaitNodeParams = 1<<8 > Adds cudaExternalSemaphoreWaitNodeParams to output > cudaGraphDebugDotFlagsKernelNodeAttributes = 1<<9 > Adds cudaKernelNodeAttrID values to output > cudaGraphDebugDotFlagsHandles = 1<<10 > Adds node handles and every kernel function handle to output > cudaGraphDebugDotFlagsConditionalNodeParams = 1<<15 > Adds cudaConditionalNodeParams to output > `1 << 10` is not the most verbose flag. it is just one flag to add node handles and every kernel function handle to output. `1 << 0` is the most verbose flag, under the name `cudaGraphDebugDotFlagsVerbose`. Here is an example of graph, dumped with `1 << 10`: ```dot digraph dot { subgraph cluster_1 { label="graph_1" graph[style="dashed"]; "graph_1_node_0"[style="solid" shape="rectangle" label="0 MEM_ALLOC node handle: 0x000055D2889750F0 "]; "graph_1_node_1"[style="bold" shape="octagon" label="1 _Z3addPhS_S_m node handle: 0x000055D288979A20 func handle: 0x000055D288978D40 "]; "graph_1_node_2"[style="solid" shape="trapezium"label="2 MEMCPY node handle: 0x000055D28897A130 (DtoH,1024) "]; "graph_1_node_3"[style="solid" shape="rectangle" label="3 MEM_FREE node handle: 0x000055D2889890C0 "]; "graph_1_node_0" -> "graph_1_node_1"; "graph_1_node_1" -> "graph_1_node_2"; "graph_1_node_2" -> "graph_1_node_3"; } } ``` The same graph dumped with `1 << 0`: ```dot digraph dot { subgraph cluster_1 { label="graph_1" graph[style="dashed"]; "graph_1_node_0"[style="solid" shape="record" label="{ MEM_ALLOC | {{ID | node handle} | {0 (topoId: 3) | 0x000055D2889750F0}} | {{{poolProps | {allocType | handleTypes | {location | {type | id}}} | {PINNED | NONE | DEVICE | 0}}}} | {{bytesize | dptr} | {1024 | 0x0000000A02000000}} }"]; "graph_1_node_1"[style="bold" shape="record" label="{KERNEL | {ID | 1 (topoId: 2) | _Z3addPhS_S_m\<\<\<4,256,0\>\>\>} | {{node handle | func handle} | {0x000055D288979A20 | 0x000055D288978D40}} | {accessPolicyWindow | {base_ptr | num_bytes | hitRatio | hitProp | missProp} | {0x0000000000000000 | 0 | 0.000000 | N | N}} | {cooperative | 0} | {priority | 0} }"]; "graph_1_node_2"[style="solid" shape="record" label="{ MEMCPY | {{ID | node handle} | {2 (topoId: 1) | 0x000055D28897A130}} | {kind | DtoH (DEVICE to HOST PAGEABLE)} | {{srcPtr | dstPtr} | {pitch | ptr | xsize | ysize | pitch | ptr | xsize | ysize} | {0 | 0x0000000A02000000 | 0 | 0 | 0 | 0x000055D287CA6DB0 | 0 | 0}} | {{srcPos | {{x | 0} | {y | 0} | {z | 0}}} | {dstPos | {{x | 0} | {y | 0} | {z | 0}}} | {Extent | {{Width | 1024} | {Height | 1} | {Depth | 1}}}} }"]; "graph_1_node_3"[style="solid" shape="record" label="{ MEM_FREE | {{ID | node handle} | {3 (topoId: 0) | 0x000055D2889890C0}} | {{dptr} | {0x0000000A02000000}} }"]; "graph_1_node_0" -> "graph_1_node_1" [headlabel=0]; "graph_1_node_1" -> "graph_1_node_2" [headlabel=0]; "graph_1_node_2" -> "graph_1_node_3" [headlabel=0]; } } ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/126694 Approved by: https://github.com/eqy, https://github.com/eellison
63 lines
1.1 KiB
Python
63 lines
1.1 KiB
Python
"""Constants for annotations in the mapping.
|
|
|
|
The constants defined here are used to annotate the mapping tuples in cuda_to_hip_mappings.py.
|
|
They are based on
|
|
https://github.com/ROCm/HIPIFY/blob/master/src/Statistics.h
|
|
and fall in three categories: 1) type of mapping, 2) API of mapping, 3) unsupported
|
|
mapping.
|
|
"""
|
|
|
|
CONV_VERSION = 0,
|
|
CONV_INIT = 1
|
|
CONV_DEVICE = 2
|
|
CONV_MEM = 3
|
|
CONV_KERN = 4
|
|
CONV_COORD_FUNC = 5
|
|
CONV_MATH_FUNC = 6
|
|
CONV_DEVICE_FUNC = 7
|
|
CONV_SPECIAL_FUNC = 8
|
|
CONV_STREAM = 9
|
|
CONV_EVENT = 10
|
|
CONV_OCCUPANCY = 11
|
|
CONV_CONTEXT = 12
|
|
CONV_PEER = 13
|
|
CONV_MODULE = 14
|
|
CONV_CACHE = 15
|
|
CONV_EXEC = 16
|
|
CONV_ERROR = 17
|
|
CONV_DEF = 18
|
|
CONV_TEX = 19
|
|
CONV_GL = 20
|
|
CONV_GRAPHICS = 21
|
|
CONV_SURFACE = 22
|
|
CONV_JIT = 23
|
|
CONV_D3D9 = 24
|
|
CONV_D3D10 = 25
|
|
CONV_D3D11 = 26
|
|
CONV_VDPAU = 27
|
|
CONV_EGL = 28
|
|
CONV_THREAD = 29
|
|
CONV_OTHER = 30
|
|
CONV_INCLUDE = 31
|
|
CONV_INCLUDE_CUDA_MAIN_H = 32
|
|
CONV_TYPE = 33
|
|
CONV_LITERAL = 34
|
|
CONV_NUMERIC_LITERAL = 35
|
|
CONV_LAST = 36
|
|
|
|
API_DRIVER = 37
|
|
API_RUNTIME = 38
|
|
API_BLAS = 39
|
|
API_SPECIAL = 40
|
|
API_RAND = 41
|
|
API_LAST = 42
|
|
API_FFT = 43
|
|
API_RTC = 44
|
|
API_ROCTX = 45
|
|
|
|
HIP_UNSUPPORTED = 46
|
|
API_PYTORCH = 1337
|
|
API_CAFFE2 = 1338
|
|
API_C10 = 1339
|
|
API_ROCMSMI = 1340
|