mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: This PR adds scripts that could be used for https://github.com/pytorch/pytorch/issues/26052 Example output: ``` Success: TestTorchDeviceTypeCPU.test_advancedindex_big_cpu Success: TestTorchDeviceTypeCPU.test_addcmul_cpu Success: TestTorchDeviceTypeCPU.test_addbmm_cpu_float32 Success: TestTorchDeviceTypeCPU.test_advancedindex_cpu_float16 Success: TestTorchDeviceTypeCPU.test_addmv_cpu Success: TestTorchDeviceTypeCPU.test_addcdiv_cpu Success: TestTorchDeviceTypeCPU.test_all_any_empty_cpu Success: TestTorchDeviceTypeCPU.test_atan2_cpu Success: TestTorchDeviceTypeCPU.test_advancedindex_cpu_float64 Success: TestTorchDeviceTypeCPU.test_baddbmm_cpu_float32 Success: TestTorchDeviceTypeCPU.test_atan2_edgecases_cpu Success: TestTorchDeviceTypeCPU.test_add_cpu Success: TestTorchDeviceTypeCPU.test_addr_cpu_bfloat16 Success: TestTorchDeviceTypeCPU.test_addr_cpu_float32 ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/28127 Differential Revision: D18184255 Pulled By: mruberry fbshipit-source-id: 7fd4bd9faf9f8b37b369f631c63f26eb965b16e7
99 lines
4.2 KiB
Python
99 lines
4.2 KiB
Python
# this file contains a simple parser that parses report
|
|
# from cuda-memcheck
|
|
|
|
class ParseError(Exception):
|
|
"""Whenever the simple parser is unable to parse the report, this exception will be raised"""
|
|
pass
|
|
|
|
|
|
class Report:
|
|
"""A report is a container of errors, and a summary on how many errors are found"""
|
|
|
|
HEAD = 'ERROR SUMMARY: '
|
|
TAIL = ' errors'
|
|
|
|
def __init__(self, text, errors):
|
|
self.text = text
|
|
self.num_errors = int(text[len(self.HEAD):len(text) - len(self.TAIL)])
|
|
self.errors = errors
|
|
if len(errors) != self.num_errors:
|
|
raise ParseError("Number of errors does not match")
|
|
|
|
|
|
class Error:
|
|
"""Each error is a section in the output of cuda-memcheck.
|
|
Each error in the report has an error message and a backtrace. It looks like:
|
|
|
|
========= Program hit cudaErrorInvalidValue (error 1) due to "invalid argument" on CUDA API call to cudaGetLastError.
|
|
========= Saved host backtrace up to driver entry point at error
|
|
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x38c7b3]
|
|
========= Host Frame:/usr/local/cuda/lib64/libcudart.so.10.1 (cudaGetLastError + 0x163) [0x4c493]
|
|
========= Host Frame:/home/xgao/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so [0x5b77a05]
|
|
========= Host Frame:/home/xgao/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so [0x39d6d1d]
|
|
========= .....
|
|
"""
|
|
|
|
def __init__(self, lines):
|
|
self.message = lines[0]
|
|
lines = lines[2:]
|
|
self.stack = [l.strip() for l in lines]
|
|
|
|
|
|
def parse(message):
|
|
"""A simple parser that parses the report of cuda-memcheck. This parser is meant to be simple
|
|
and it only split the report into separate errors and a summary. Where each error is further
|
|
splitted into error message and backtrace. No further details are parsed.
|
|
|
|
A report contains multiple errors and a summary on how many errors are detected. It looks like:
|
|
|
|
========= CUDA-MEMCHECK
|
|
========= Program hit cudaErrorInvalidValue (error 1) due to "invalid argument" on CUDA API call to cudaPointerGetAttributes.
|
|
========= Saved host backtrace up to driver entry point at error
|
|
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x38c7b3]
|
|
========= Host Frame:/usr/local/cuda/lib64/libcudart.so.10.1 (cudaPointerGetAttributes + 0x1a9) [0x428b9]
|
|
========= Host Frame:/home/xgao/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so [0x5b778a9]
|
|
========= .....
|
|
=========
|
|
========= Program hit cudaErrorInvalidValue (error 1) due to "invalid argument" on CUDA API call to cudaGetLastError.
|
|
========= Saved host backtrace up to driver entry point at error
|
|
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x38c7b3]
|
|
========= Host Frame:/usr/local/cuda/lib64/libcudart.so.10.1 (cudaGetLastError + 0x163) [0x4c493]
|
|
========= .....
|
|
=========
|
|
========= .....
|
|
=========
|
|
========= Program hit cudaErrorInvalidValue (error 1) due to "invalid argument" on CUDA API call to cudaGetLastError.
|
|
========= Saved host backtrace up to driver entry point at error
|
|
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x38c7b3]
|
|
========= .....
|
|
========= Host Frame:python (_PyEval_EvalFrameDefault + 0x6a0) [0x1d0ad0]
|
|
========= Host Frame:python (_PyEval_EvalCodeWithName + 0xbb9) [0x116db9]
|
|
=========
|
|
========= ERROR SUMMARY: 4 errors
|
|
"""
|
|
errors = []
|
|
HEAD = '========='
|
|
headlen = len(HEAD)
|
|
started = False
|
|
in_message = False
|
|
message_lines = []
|
|
lines = message.splitlines()
|
|
for l in lines:
|
|
if l == HEAD + ' CUDA-MEMCHECK':
|
|
started = True
|
|
continue
|
|
if not started or not l.startswith(HEAD):
|
|
continue
|
|
l = l[headlen + 1:]
|
|
if l.startswith('ERROR SUMMARY:'):
|
|
return Report(l, errors)
|
|
if not in_message:
|
|
in_message = True
|
|
message_lines = [l]
|
|
elif l == '':
|
|
errors.append(Error(message_lines))
|
|
in_message = False
|
|
else:
|
|
message_lines.append(l)
|
|
raise ParseError("No error summary found")
|