Revert "Add cuda-11.3+clang9 build workflow"

This reverts commit 709fcc862e.

Reverted https://github.com/pytorch/pytorch/pull/75293 on behalf of https://github.com/janeyx99
This commit is contained in:
PyTorch MergeBot 2022-04-11 15:24:59 +00:00
parent 1a85699c03
commit 8fe43d76d5
11 changed files with 11 additions and 36 deletions

View File

@ -134,18 +134,6 @@ case "$image" in
VISION=yes
KATEX=yes
;;
pytorch-linux-bionic-cuda11.3-cudnn8-py3-clang9)
CUDA_VERSION=11.3.0 # Deviating from major.minor to conform to nvidia's Docker image names
CUDNN_VERSION=8
TENSORRT_VERSION=8.0.1.6
ANACONDA_PYTHON_VERSION=3.7
CMAKE_VERSION=3.10.3
CLANG_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
;;
pytorch-linux-bionic-cuda11.5-cudnn8-py3-gcc7)
CUDA_VERSION=11.5.0
CUDNN_VERSION=8

View File

@ -25,7 +25,6 @@ jobs:
matrix:
include:
- docker-image-name: pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7
- docker-image-name: pytorch-linux-bionic-cuda11.3-cudnn8-py3-clang9
- docker-image-name: pytorch-linux-bionic-cuda11.5-cudnn8-py3-gcc7
- docker-image-name: pytorch-linux-bionic-py3.7-clang9
- docker-image-name: pytorch-linux-bionic-rocm4.5-py3.7

View File

@ -135,13 +135,6 @@ jobs:
{ config: "noarch", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
]}
linux-bionic-cuda11_3-py3_7-clang9-build:
name: linux-bionic-cuda11.3-py3.7-clang9
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-bionic-cuda11.3-py3.7-clang9
docker-image-name: pytorch-linux-bionic-cuda11.3-cudnn8-py3-clang9
linux-vulkan-bionic-py3_7-clang9-build:
name: linux-vulkan-bionic-py3.7-clang9
uses: ./.github/workflows/_linux-build.yml

View File

@ -79,7 +79,7 @@ void binomial_cuda_kernel(
using accscalar_t = at::acc_type<scalar_t, true>;
at::native::distribution_binary_kernel(iter, philox_args,
[] GPU_LAMBDA (curandStatePhilox4_32_10_t& state, scalar_t count, scalar_t prob) {
[philox_args] GPU_LAMBDA (curandStatePhilox4_32_10_t& state, scalar_t count, scalar_t prob) {
#if defined(__CUDA_ARCH__) || defined(USE_ROCM)
auto uniform_lambda = curand_uniform_wrapper(state);
BaseSampler<accscalar_t, decltype(uniform_lambda)> standard_uniform(uniform_lambda);

View File

@ -90,9 +90,9 @@ static void Baseline_LayerNorm(
std::vector<int64_t> input_shape{
benchmark_state.range(0), benchmark_state.range(1)};
const size_t kReductionAxis = 1;
const int kReductionAxis = 1;
std::vector<int64_t> norm_shape;
for (auto idx = kReductionAxis; idx < input_shape.size(); ++idx) {
for (int idx = kReductionAxis; idx < input_shape.size(); ++idx) {
norm_shape.push_back(input_shape[idx]);
}

View File

@ -115,9 +115,9 @@ static void Baseline_LayerNorm_BWD(
std::vector<int64_t> input_shape{
benchmark_state.range(0), benchmark_state.range(1)};
const size_t kReductionAxis = 1;
const int kReductionAxis = 1;
std::vector<int64_t> norm_shape;
for (auto idx = kReductionAxis; idx < input_shape.size(); ++idx) {
for (int idx = kReductionAxis; idx < input_shape.size(); ++idx) {
norm_shape.push_back(input_shape[idx]);
}

View File

@ -38,12 +38,6 @@ endif()
# Enable CUDA language support
set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
# Pass clang as host compiler, which according to the docs
# Must be done before CUDA language is enabled, see mast be done before
# see https://cmake.org/cmake/help/v3.15/variable/CMAKE_CUDA_HOST_COMPILER.html
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_C_COMPILER}")
endif()
enable_language(CUDA)
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

View File

@ -470,7 +470,8 @@ function(torch_compile_options libname)
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
list(APPEND private_compile_options
-Wno-range-loop-analysis)
else()
endif()
if(NOT APPLE)
list(APPEND private_compile_options
# Considered to be flaky. See the discussion at
# https://github.com/pytorch/pytorch/pull/9608

View File

@ -508,6 +508,7 @@ void testReduceScatter(const std::string& path, int rank, int size) {
void testProcessGroupNCCLHealthCheckFailHelper(const std::string& path, bool timeout) {
// simulate world_size > 1 here via threads.
const int worldSize = 4;
std::mutex m;
std::unordered_set<uint64_t> nums;
auto runTest = [&](int i) {
NCCLTest test(path, worldSize, std::chrono::milliseconds(3000));

View File

@ -143,7 +143,6 @@ if(USE_CUDA)
${TORCH_CUDA_LIBRARIES})
target_compile_definitions(test_jit PRIVATE USE_CUDA)
target_compile_options(test_jit PRIVATE -Wno-sign-compare)
elseif(USE_ROCM)
target_link_libraries(test_jit PRIVATE
${ROCM_HIPRTC_LIB}

View File

@ -697,7 +697,7 @@ c10::intrusive_ptr<ProcessGroup::Work> ProcessGroupMPI::alltoall_base(
"Tensor's dim 0 does not divide equally across group size");
std::function<void(std::unique_ptr<WorkEntry>&)> runFunc =
[this](std::unique_ptr<WorkEntry>& entry) {
[opts, this](std::unique_ptr<WorkEntry>& entry) {
auto srcdata = (entry->src)[0];
auto dstdata = (entry->dst)[0];
c10::DeviceGuard guard(srcdata.device());
@ -724,7 +724,7 @@ c10::intrusive_ptr<ProcessGroup::Work> ProcessGroupMPI::alltoall_base(
c10d::checkSplitSizes(inputSplitSizes, inputTensor, size_);
c10d::checkSplitSizes(outputSplitSizes, outputTensor, size_);
std::function<void(std::unique_ptr<WorkEntry>&)> runFunc =
[this, inputSplitSizes, outputSplitSizes](
[opts, this, inputSplitSizes, outputSplitSizes](
std::unique_ptr<WorkEntry>& entry) {
auto srcdata = (entry->src)[0];
auto dstdata = (entry->dst)[0];
@ -771,7 +771,7 @@ c10::intrusive_ptr<ProcessGroup::Work> ProcessGroupMPI::alltoall(
outputTensors.size() == size_,
"Number of output tensors are not equal to group size");
std::function<void(std::unique_ptr<WorkEntry>&)> runFunc =
[this](std::unique_ptr<WorkEntry>& entry) {
[opts, this](std::unique_ptr<WorkEntry>& entry) {
std::vector<int> send_lengths(size_);
std::vector<int> recv_lengths(size_);
std::vector<int> send_offsets(size_);