mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Summary: Another step forward in fixing https://github.com/pytorch/pytorch/issues/62359 Disclaimer: this only works with GHA for now, as circleci would require changes in probot. Test plan can be seen a previous description where I modified the description to include linked issues. I've removed them now since the actual PR doesn't fix any of them. It works! In the [periodic 11.3 test1](https://github.com/pytorch/pytorch/pull/62851/checks?check_run_id=3263109970), we get this in the logs and we see that PYTORCH_IGNORE_DISABLED_ISSUES is properly set: ``` test_jit_cuda_extension (__main__.TestCppExtensionJIT) ... Using /var/lib/jenkins/.cache/torch_extensions/py36_cu113 as PyTorch extensions root... Creating extension directory /var/lib/jenkins/.cache/torch_extensions/py36_cu113/torch_test_cuda_extension... Detected CUDA files, patching ldflags Emitting ninja build file /var/lib/jenkins/.cache/torch_extensions/py36_cu113/torch_test_cuda_extension/build.ninja... Building extension module torch_test_cuda_extension... Using envvar MAX_JOBS (30) as the number of workers... [1/3] c++ -MMD -MF cuda_extension.o.d -DTORCH_EXTENSION_NAME=torch_test_cuda_extension -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11 (d55b25a633)_COMPILER_TYPE=\"_gcc\" -DPYBIND11 (d55b25a633)_STDLIB=\"_libstdcpp\" -DPYBIND11 (d55b25a633)_BUILD_ABI=\"_cxxabi1011\" -isystem /opt/conda/lib/python3.6/site-packages/torch/include -isystem /opt/conda/lib/python3.6/site-packages/torch/include/torch/csrc/api/include -isystem /opt/conda/lib/python3.6/site-packages/torch/include/TH -isystem /opt/conda/lib/python3.6/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /opt/conda/include/python3.6m -D_GLIBCXX_USE_CXX11_ABI=1 -fPIC -std=c++14 -c /var/lib/jenkins/workspace/test/cpp_extensions/cuda_extension.cpp -o cuda_extension.o [2/3] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=torch_test_cuda_extension -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11 (d55b25a633)_COMPILER_TYPE=\"_gcc\" -DPYBIND11 (d55b25a633)_STDLIB=\"_libstdcpp\" -DPYBIND11 (d55b25a633)_BUILD_ABI=\"_cxxabi1011\" -isystem /opt/conda/lib/python3.6/site-packages/torch/include -isystem /opt/conda/lib/python3.6/site-packages/torch/include/torch/csrc/api/include -isystem /opt/conda/lib/python3.6/site-packages/torch/include/TH -isystem /opt/conda/lib/python3.6/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /opt/conda/include/python3.6m -D_GLIBCXX_USE_CXX11_ABI=1 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_52,code=compute_52 -gencode=arch=compute_52,code=sm_52 --compiler-options '-fPIC' -O2 -std=c++14 -c /var/lib/jenkins/workspace/test/cpp_extensions/cuda_extension.cu -o cuda_extension.cuda.o nvcc warning : The 'compute_35', 'compute_37', 'compute_50', 'sm_35', 'sm_37' and 'sm_50' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning). [3/3] c++ cuda_extension.o cuda_extension.cuda.o -shared -L/opt/conda/lib/python3.6/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart -o torch_test_cuda_extension.so Loading extension module torch_test_cuda_extension... ok (26.161s) ``` whereas on the latest master periodic 11.1 windows [test](https://github.com/pytorch/pytorch/runs/3263762478?check_suite_focus=true), we see ``` test_jit_cuda_extension (__main__.TestCppExtensionJIT) ... skip (0.000s) ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/62851 Reviewed By: walterddr, tktrungna Differential Revision: D30192029 Pulled By: janeyx99 fbshipit-source-id: fd2ecc59d2b2bb5c31522a630dd805070d59f584
85 lines
3.3 KiB
Python
Executable File
85 lines
3.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
"""Generates a matrix to be utilized through github actions
|
|
|
|
Will output a matrix to represent our testing configurations, which is currently
|
|
dictated by just sharding.
|
|
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
from typing import Dict
|
|
|
|
from typing_extensions import TypedDict
|
|
|
|
|
|
class Config(TypedDict):
|
|
num_shards: int
|
|
runner: str
|
|
|
|
|
|
def get_disabled_issues() -> str:
|
|
pr_body = os.getenv('PR_BODY', '')
|
|
# The below regex is meant to match all *case-insensitive* keywords that
|
|
# GitHub has delineated would link PRs to issues, more details here:
|
|
# https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue.
|
|
# E.g., "Close #62851", "fixES #62851" and "RESOLVED #62851" would all match, but not
|
|
# "closes #62851" --> extra space, "fixing #62851" --> not a keyword, nor "fix 62851" --> no #
|
|
regex = '(?i)(Close(d|s)?|Resolve(d|s)?|Fix(ed|es)?) #([0-9]+)'
|
|
issue_numbers = [x[4] for x in re.findall(regex, pr_body)]
|
|
return ','.join(issue_numbers)
|
|
|
|
|
|
def main() -> None:
|
|
TEST_RUNNER_TYPE = os.getenv('TEST_RUNNER_TYPE')
|
|
assert TEST_RUNNER_TYPE is not None
|
|
ON_PULL_REQUEST = os.getenv('GITHUB_HEAD_REF')
|
|
NUM_TEST_SHARDS_ON_PULL_REQUEST = os.getenv('NUM_TEST_SHARDS_ON_PULL_REQUEST')
|
|
NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '1'))
|
|
if ON_PULL_REQUEST and NUM_TEST_SHARDS_ON_PULL_REQUEST:
|
|
NUM_TEST_SHARDS = int(NUM_TEST_SHARDS_ON_PULL_REQUEST)
|
|
MULTIGPU_RUNNER_TYPE = os.getenv('MULTIGPU_RUNNER_TYPE')
|
|
NOGPU_RUNNER_TYPE = os.getenv('NOGPU_RUNNER_TYPE')
|
|
configs: Dict[str, Config] = {}
|
|
if os.getenv('ENABLE_JIT_LEGACY_TEST'):
|
|
configs['jit_legacy'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
|
|
if MULTIGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_MULTIGPU_TEST'):
|
|
configs['multigpu'] = {'num_shards': 1, 'runner': MULTIGPU_RUNNER_TYPE}
|
|
if NOGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_NOGPU_NO_AVX_TEST'):
|
|
configs['nogpu_NO_AVX'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
|
|
if NOGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_NOGPU_NO_AVX2_TEST'):
|
|
configs['nogpu_NO_AVX2'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
|
|
if os.getenv('ENABLE_SLOW_TEST'):
|
|
configs['slow'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
|
|
matrix = {
|
|
'include': [
|
|
{
|
|
'config': 'default',
|
|
'shard': shard,
|
|
'num_shards': NUM_TEST_SHARDS,
|
|
'runner': TEST_RUNNER_TYPE,
|
|
}
|
|
for shard in range(1, NUM_TEST_SHARDS + 1)
|
|
] + [
|
|
{
|
|
'config': name,
|
|
'shard': shard,
|
|
'num_shards': config['num_shards'],
|
|
'runner': config['runner'],
|
|
}
|
|
for name, config in configs.items()
|
|
for shard in range(1, config['num_shards'] + 1)
|
|
]
|
|
}
|
|
render_matrix = {'config': list(dict.fromkeys(x['config'] for x in matrix['include']))}
|
|
print(json.dumps({'matrix': matrix, 'render-matrix': render_matrix}, indent=2))
|
|
print(f'::set-output name=matrix::{json.dumps(matrix)}')
|
|
print(f'::set-output name=render-matrix::{json.dumps(render_matrix)}')
|
|
print(f'::set-output name=ignore-disabled-issues::{get_disabled_issues()}')
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|