benchmark: higher tolerance for RobertaForQuestionAnswering (#107376)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/107376 Approved by: https://github.com/kit1980, https://github.com/XiaobingSuper, https://github.com/jansel ghstack dependencies: #107375
2025-12-06 12:20:52 +01:00 · 2023-08-17 07:09:14 -07:00 · 2023-08-17 07:09:14 -07:00 · b9befc53a6
commit b9befc53a6
parent 1ea83f04d2
3 changed files with 10 additions and 4 deletions
--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_dynamic_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_dynamic_inference.csv
@ -35,7 +35,7 @@ PLBartForConditionalGeneration,pass,0
 PegasusForCausalLM,pass,0
 PegasusForConditionalGeneration,pass,0
 RobertaForCausalLM,pass,0
-RobertaForQuestionAnswering,fail_accuracy,0
+RobertaForQuestionAnswering,pass,0
 Speech2Text2ForCausalLM,pass,0
 T5ForConditionalGeneration,pass,0
 T5Small,pass,0
--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv
@ -37,7 +37,7 @@ PLBartForConditionalGeneration,pass,0
 PegasusForCausalLM,pass,0
 PegasusForConditionalGeneration,pass,0
 RobertaForCausalLM,pass,0
-RobertaForQuestionAnswering,fail_accuracy,0
+RobertaForQuestionAnswering,pass,0
 Speech2Text2ForCausalLM,pass,0
 T5ForConditionalGeneration,pass,0
 T5Small,pass,0
--- a/benchmarks/dynamo/huggingface.py
+++ b/benchmarks/dynamo/huggingface.py
@ -163,12 +163,15 @@ SKIP_ACCURACY_CHECK_MODELS = {
 }


-REQUIRE_HIGHER_TOLERANCE = {
+REQUIRE_HIGHER_TOLERANCE_TRAINING = {
    "MT5ForConditionalGeneration",
    # AlbertForQuestionAnswering fails in CI GCP A100 but error does not seem
    # harmful.
    "AlbertForQuestionAnswering",
 }
+REQUIRE_HIGHER_TOLERANCE_INFERENCE = {
+    "RobertaForQuestionAnswering",
+}


 SKIP_FOR_CPU = {
@ -530,10 +533,13 @@ class HuggingfaceRunner(BenchmarkRunner):
    def get_tolerance_and_cosine_flag(self, is_training, current_device, name):
        cosine = self.args.cosine
        if is_training:
-            if name in REQUIRE_HIGHER_TOLERANCE:
+            if name in REQUIRE_HIGHER_TOLERANCE_TRAINING:
                return 2e-2, cosine
            else:
                return 1e-2, cosine
+        else:
+            if name in REQUIRE_HIGHER_TOLERANCE_INFERENCE:
+                return 4e-3, cosine
        return 1e-3, cosine

    def compute_loss(self, pred):