[cuDNN][Quantization] Don't print when plan finalization fails in cuDNN quantization backend (#128177)

Similar in spirit to #125790, hopefully addresses failures seen for cuDNN 9.1 upgrade: #https://github.com/pytorch/pytorch/pull/128166

CC @nWEIdia @atalman

Pull Request resolved: https://github.com/pytorch/pytorch/pull/128177
Approved by: https://github.com/nWEIdia, https://github.com/Skylion007
This commit is contained in:
Eddie Yan 2024-06-11 18:09:25 +00:00 committed by PyTorch MergeBot
parent 8a09940a54
commit cac7a22b92
4 changed files with 3 additions and 4 deletions

View File

@ -242,7 +242,7 @@ Tensor add(Tensor qa, Tensor qb, double output_scale, int64_t output_zero_point)
run(plan_desc);
execution_plan_cache[key] = plan_desc;
return quantized_output.view(orig_sizes);
} catch (cudnn_frontend::cudnnException &e) {std::cout << "cudnn error:" << e.what() << std::endl;} catch(c10::CuDNNError &e) { std::cout << "other error" << e.what() << std::endl;}
} catch (cudnn_frontend::cudnnException &e) {} catch(c10::CuDNNError &e) {}
}
TORCH_CHECK(false, "Unable to find an engine to execute this computation in Quantized Add Cudnn");

View File

@ -252,7 +252,7 @@ void PackedConvWeightCudnn<kSpatialDim>::apply_impl_helper(const at::Tensor& qua
run(plan);
execution_plan_cache.emplace(key, plan);
return;
} catch (cudnn_frontend::cudnnException &e) {std::cout << "cudnn error:" << e.what() << std::endl;} catch(c10::CuDNNError &e) { std::cout << "other error" << e.what() << std::endl;}
} catch (cudnn_frontend::cudnnException &e) {} catch(c10::CuDNNError &e) {}
}
TORCH_CHECK(false, "Unable to find an engine to execute this computation in Quantized Conv2D Cudnn");

View File

@ -286,7 +286,7 @@ void PackedLinearWeightCudnn::apply_impl_helper(const at::Tensor& quantized_outp
run(plan);
execution_plan_cache.emplace(key, plan);
return;
} catch (cudnn_frontend::cudnnException &e) {std::cout << "cudnn error:" << e.what() << std::endl;} catch(c10::CuDNNError &e) { std::cout << "other error" << e.what() << std::endl;}
} catch (cudnn_frontend::cudnnException &e) {} catch(c10::CuDNNError &e) {}
}
TORCH_CHECK(false, "Unable to find an engine to execute this computation Quantized Linear Cudnn");

View File

@ -4052,7 +4052,6 @@ class TestQuantizedLinear(TestCase):
use_channelwise=st.sampled_from([False])) # channelwise currently not supported for qlinear cudnn
@skipIfNoFBGEMM
@unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.")
@unittest.skipIf(TEST_CUDNN and torch.backends.cudnn.version() == 90100, "expected failure on cuDNN 9.1.0")
@unittest.skipIf(not SM80OrLater, "requires sm80 or later.")
@unittest.skipIf(TEST_ROCM, "not supported on rocm.")
# TODO: check with yang regarding CUDNN flags