diff --git a/benchmarks/dynamo/check_accuracy.py b/benchmarks/dynamo/check_accuracy.py index 4bd518790b3..c90e4da33b1 100644 --- a/benchmarks/dynamo/check_accuracy.py +++ b/benchmarks/dynamo/check_accuracy.py @@ -78,6 +78,8 @@ def check_accuracy(actual_csv, expected_csv, expected_filename): "google/gemma-3-4b-it", "openai/whisper-tiny", "Qwen/Qwen3-0.6B", + "mistralai/Mistral-7B-Instruct-v0.3", + "openai/gpt-oss-20b", } ) diff --git a/benchmarks/dynamo/check_graph_breaks.py b/benchmarks/dynamo/check_graph_breaks.py index 87ef46b6832..cef7259206e 100644 --- a/benchmarks/dynamo/check_graph_breaks.py +++ b/benchmarks/dynamo/check_graph_breaks.py @@ -61,6 +61,8 @@ def check_graph_breaks(actual_csv, expected_csv, expected_filename): "google/gemma-3-4b-it", "openai/whisper-tiny", "Qwen/Qwen3-0.6B", + "mistralai/Mistral-7B-Instruct-v0.3", + "openai/gpt-oss-20b", } ) diff --git a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_huggingface_inference.csv index b2595458b13..59b0b7d723e 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_huggingface_inference.csv @@ -191,3 +191,11 @@ openai/whisper-tiny,pass,0 Qwen/Qwen3-0.6B,pass,0 + + + +mistralai/Mistral-7B-Instruct-v0.3,pass,0 + + + +openai/gpt-oss-20b,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_huggingface_inference.csv index 25e3c419935..cd1f40fa49d 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_huggingface_inference.csv @@ -187,3 +187,11 @@ openai/whisper-tiny,fail_to_run,0 Qwen/Qwen3-0.6B,fail_to_run,0 + + + +mistralai/Mistral-7B-Instruct-v0.3,fail_to_run,0 + + + +openai/gpt-oss-20b,fail_to_run,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_huggingface_inference.csv index 386f9099731..83523ea778a 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_huggingface_inference.csv @@ -191,3 +191,11 @@ openai/whisper-tiny,pass_due_to_skip,0 Qwen/Qwen3-0.6B,pass_due_to_skip,0 + + + +mistralai/Mistral-7B-Instruct-v0.3,pass_due_to_skip,0 + + + +openai/gpt-oss-20b,pass_due_to_skip,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_huggingface_inference.csv index 386f9099731..83523ea778a 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_huggingface_inference.csv @@ -191,3 +191,11 @@ openai/whisper-tiny,pass_due_to_skip,0 Qwen/Qwen3-0.6B,pass_due_to_skip,0 + + + +mistralai/Mistral-7B-Instruct-v0.3,pass_due_to_skip,0 + + + +openai/gpt-oss-20b,pass_due_to_skip,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_inference.csv index 386f9099731..83523ea778a 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_inference.csv @@ -191,3 +191,11 @@ openai/whisper-tiny,pass_due_to_skip,0 Qwen/Qwen3-0.6B,pass_due_to_skip,0 + + + +mistralai/Mistral-7B-Instruct-v0.3,pass_due_to_skip,0 + + + +openai/gpt-oss-20b,pass_due_to_skip,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_huggingface_inference.csv index b2595458b13..59b0b7d723e 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_huggingface_inference.csv @@ -191,3 +191,11 @@ openai/whisper-tiny,pass,0 Qwen/Qwen3-0.6B,pass,0 + + + +mistralai/Mistral-7B-Instruct-v0.3,pass,0 + + + +openai/gpt-oss-20b,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_huggingface_inference.csv index b2595458b13..59b0b7d723e 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_huggingface_inference.csv @@ -191,3 +191,11 @@ openai/whisper-tiny,pass,0 Qwen/Qwen3-0.6B,pass,0 + + + +mistralai/Mistral-7B-Instruct-v0.3,pass,0 + + + +openai/gpt-oss-20b,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_huggingface_inference.csv index 7da4f6ff744..bfb8438d5bc 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_huggingface_inference.csv @@ -191,3 +191,11 @@ openai/whisper-tiny,pass,0 Qwen/Qwen3-0.6B,pass,0 + + + +mistralai/Mistral-7B-Instruct-v0.3,pass,0 + + + +openai/gpt-oss-20b,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_huggingface_inference.csv index b2595458b13..59b0b7d723e 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_huggingface_inference.csv @@ -191,3 +191,11 @@ openai/whisper-tiny,pass,0 Qwen/Qwen3-0.6B,pass,0 + + + +mistralai/Mistral-7B-Instruct-v0.3,pass,0 + + + +openai/gpt-oss-20b,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv index b2595458b13..59b0b7d723e 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv @@ -191,3 +191,11 @@ openai/whisper-tiny,pass,0 Qwen/Qwen3-0.6B,pass,0 + + + +mistralai/Mistral-7B-Instruct-v0.3,pass,0 + + + +openai/gpt-oss-20b,pass,0 diff --git a/benchmarks/dynamo/huggingface.yaml b/benchmarks/dynamo/huggingface.yaml index b45f199f4d4..bd109560150 100644 --- a/benchmarks/dynamo/huggingface.yaml +++ b/benchmarks/dynamo/huggingface.yaml @@ -11,6 +11,8 @@ skip: - GPTJForQuestionAnswering # Model too big - google/gemma-3-4b-it + - openai/gpt-oss-20b + - mistralai/Mistral-7B-Instruct-v0.3 device: cpu: @@ -19,6 +21,8 @@ skip: - google/gemma-3-4b-it - openai/whisper-tiny - Qwen/Qwen3-0.6B + - mistralai/Mistral-7B-Instruct-v0.3 + - openai/gpt-oss-20b control_flow: - AllenaiLongformerBase @@ -79,6 +83,8 @@ batch_size: google/gemma-3-4b-it: 8 openai/whisper-tiny: 8 Qwen/Qwen3-0.6B: 8 + mistralai/Mistral-7B-Instruct-v0.3: 8 + openai/gpt-oss-20b: 8 tolerance: diff --git a/benchmarks/dynamo/huggingface_llm_models.py b/benchmarks/dynamo/huggingface_llm_models.py index c8b0524c4d6..2c68254ebe1 100644 --- a/benchmarks/dynamo/huggingface_llm_models.py +++ b/benchmarks/dynamo/huggingface_llm_models.py @@ -99,4 +99,6 @@ HF_LLM_MODELS: dict[str, Benchmark] = { "google/gemma-3-4b-it": TextGenerationBenchmark, "openai/whisper-tiny": WhisperBenchmark, "Qwen/Qwen3-0.6B": TextGenerationBenchmark, + "mistralai/Mistral-7B-Instruct-v0.3": TextGenerationBenchmark, + "openai/gpt-oss-20b": TextGenerationBenchmark, } diff --git a/benchmarks/dynamo/huggingface_models_list.txt b/benchmarks/dynamo/huggingface_models_list.txt index 0a6327ae1aa..46f319ab0de 100644 --- a/benchmarks/dynamo/huggingface_models_list.txt +++ b/benchmarks/dynamo/huggingface_models_list.txt @@ -51,3 +51,5 @@ google/gemma-2-2b,8 google/gemma-3-4b-it,8 openai/whisper-tiny,8 Qwen/Qwen3-0.6B,8 +mistralai/Mistral-7B-Instruct-v0.3, 8 +openai/gpt-oss-20b, 8