diff --git a/test/slow_tests.json b/test/slow_tests.json index 65d9f1c5b7f..457701b46b6 100644 --- a/test/slow_tests.json +++ b/test/slow_tests.json @@ -1,259 +1,260 @@ { - "EndToEndLSTM (__main__.RNNTest)": 204.19766235351562, - "MultiheadAttention (__main__.ModulesTest)": 144.12199910481772, - "test__adaptive_avg_pool2d (__main__.CPUReproTests)": 89.76433372497559, - "test_after_aot_cpu_runtime_error (__main__.MinifierIsolateTests)": 60.78266716003418, - "test_aot_autograd_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 71.0364990234375, - "test_aot_autograd_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 65.0049991607666, - "test_aot_autograd_symbolic_exhaustive_masked_norm_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 93.56200218200684, - "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 122.12249755859375, - "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 202.69849395751953, - "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 178.81350326538086, - "test_aot_autograd_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 67.96700286865234, - "test_aot_autograd_symbolic_module_exhaustive_nn_TransformerDecoderLayer_cpu_float32 (__main__.TestEagerFusionModuleInfoCPU)": 153.12700271606445, - "test_avg_pool3d_backward2_cpu (__main__.CpuTests)": 1025.2469787597656, - "test_avg_pool3d_backward2_cuda (__main__.GPUTests)": 103.65142822265625, - "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 489.04433186848956, - "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 487.5743357340495, - "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 123.70524787902832, - "test_backward_nn_functional_multi_head_attention_forward_cpu_float32 (__main__.TestCompositeComplianceCPU)": 73.78200149536133, - "test_backward_nn_functional_multi_head_attention_forward_cuda_float32 (__main__.TestCompositeComplianceCUDA)": 66.94600009918213, - "test_basic_cpu (__main__.EfficientConvBNEvalCpuTests)": 184.28466669718424, - "test_basic_cuda (__main__.EfficientConvBNEvalGpuTests)": 129.23699951171875, - "test_cat_2k_args (__main__.TestTEFuserDynamic)": 105.84207906299515, - "test_cat_2k_args (__main__.TestTEFuserStatic)": 118.78279071262008, - "test_checkpointing_without_reentrant_input_requires_grad_False (__main__.TestAutogradWithCompiledAutograd)": 385.1773325602214, - "test_checkpointing_without_reentrant_input_requires_grad_True (__main__.TestAutogradWithCompiledAutograd)": 415.5623270670573, - "test_collect_callgrind (__main__.TestBenchmarkUtils)": 302.11150614420575, - "test_comprehensive_diff_cuda_complex128 (__main__.TestDecompCUDA)": 89.50349998474121, - "test_comprehensive_diff_cuda_complex64 (__main__.TestDecompCUDA)": 88.17774963378906, - "test_comprehensive_diff_cuda_float32 (__main__.TestDecompCUDA)": 63.955399703979495, - "test_comprehensive_diff_cuda_float64 (__main__.TestDecompCUDA)": 62.5629997253418, - "test_comprehensive_grid_sampler_2d_cpu_bfloat16 (__main__.TestDecompCPU)": 86.9015007019043, - "test_comprehensive_grid_sampler_2d_cpu_float16 (__main__.TestDecompCPU)": 91.01150131225586, - "test_comprehensive_grid_sampler_2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 65.91899871826172, - "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestDecompCPU)": 404.79449462890625, - "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 64.88150024414062, - "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestDecompCPU)": 390.1374969482422, - "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 65.5984992980957, - "test_comprehensive_grid_sampler_2d_cuda_bfloat16 (__main__.TestDecompCUDA)": 242.27249908447266, - "test_comprehensive_grid_sampler_2d_cuda_float16 (__main__.TestDecompCUDA)": 288.8112487792969, - "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestDecompCUDA)": 1021.2769927978516, - "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 65.61349868774414, - "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestDecompCUDA)": 1078.281997680664, - "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 69.2790002822876, - "test_comprehensive_linalg_lu_solve_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 72.57924842834473, - "test_comprehensive_linalg_lu_solve_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 69.62350082397461, - "test_comprehensive_linalg_solve_triangular_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 70.42674827575684, - "test_comprehensive_linalg_solve_triangular_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 69.98200035095215, - "test_comprehensive_linalg_svd_cuda_complex128 (__main__.TestDecompCUDA)": 68.36449909210205, - "test_comprehensive_linalg_svd_cuda_complex64 (__main__.TestDecompCUDA)": 61.57929992675781, - "test_comprehensive_linalg_vector_norm_cpu_float16 (__main__.TestInductorOpInfoCPU)": 83.75249862670898, - "test_comprehensive_linalg_vector_norm_cpu_float32 (__main__.TestInductorOpInfoCPU)": 82.4640007019043, - "test_comprehensive_linalg_vector_norm_cpu_float64 (__main__.TestInductorOpInfoCPU)": 97.88249969482422, - "test_comprehensive_logspace_cpu_float32 (__main__.TestInductorOpInfoCPU)": 181.32449340820312, - "test_comprehensive_logspace_cpu_float64 (__main__.TestInductorOpInfoCPU)": 171.81600189208984, - "test_comprehensive_logspace_cpu_int32 (__main__.TestInductorOpInfoCPU)": 169.01850128173828, - "test_comprehensive_logspace_cpu_int64 (__main__.TestInductorOpInfoCPU)": 162.26849365234375, - "test_comprehensive_masked_norm_cpu_float16 (__main__.TestInductorOpInfoCPU)": 213.85850524902344, - "test_comprehensive_masked_norm_cpu_float32 (__main__.TestInductorOpInfoCPU)": 206.47949981689453, - "test_comprehensive_masked_norm_cpu_float64 (__main__.TestInductorOpInfoCPU)": 202.62000274658203, - "test_comprehensive_masked_norm_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 116.22050094604492, - "test_comprehensive_masked_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 116.08074951171875, - "test_comprehensive_masked_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 117.2509994506836, - "test_comprehensive_nn_functional_fractional_max_pool3d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 128.12999725341797, - "test_comprehensive_nn_functional_fractional_max_pool3d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 126.67150115966797, - "test_comprehensive_nn_functional_fractional_max_pool3d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 118.62150192260742, - "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestDecompCUDA)": 91.70499992370605, - "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float64 (__main__.TestDecompCUDA)": 96.74850082397461, - "test_comprehensive_nn_functional_grid_sample_cpu_float32 (__main__.TestDecompCPU)": 99.50400161743164, - "test_comprehensive_nn_functional_grid_sample_cpu_float64 (__main__.TestDecompCPU)": 117.61600112915039, - "test_comprehensive_nn_functional_grid_sample_cuda_bfloat16 (__main__.TestDecompCUDA)": 61.68622292412652, - "test_comprehensive_nn_functional_grid_sample_cuda_float32 (__main__.TestDecompCUDA)": 285.966251373291, - "test_comprehensive_nn_functional_grid_sample_cuda_float64 (__main__.TestDecompCUDA)": 286.9002494812012, - "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestDecompCUDA)": 75.1487263766202, - "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 73.88652204430622, - "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestDecompCUDA)": 72.14090042114258, - "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 75.99790482293992, - "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestDecompCUDA)": 90.85624885559082, - "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float64 (__main__.TestDecompCUDA)": 94.66775131225586, - "test_comprehensive_nn_functional_max_pool1d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 169.1240005493164, - "test_comprehensive_nn_functional_max_pool1d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 162.36299896240234, - "test_comprehensive_nn_functional_max_pool1d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 169.9939956665039, - "test_comprehensive_nn_functional_max_pool2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 1291.2069702148438, - "test_comprehensive_nn_functional_max_pool2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 1166.8740234375, - "test_comprehensive_nn_functional_max_pool2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 1116.0714721679688, - "test_comprehensive_nn_functional_max_pool2d_cpu_int32 (__main__.TestInductorOpInfoCPU)": 994.1804809570312, - "test_comprehensive_nn_functional_max_pool2d_cpu_int64 (__main__.TestInductorOpInfoCPU)": 982.9049987792969, - "test_comprehensive_nn_functional_max_pool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 1249.2317504882812, - "test_comprehensive_nn_functional_max_pool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 1255.1132507324219, - "test_comprehensive_nn_functional_max_pool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 1255.1112365722656, - "test_comprehensive_nn_functional_max_pool3d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 929.2744750976562, - "test_comprehensive_nn_functional_max_pool3d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 740.7665100097656, - "test_comprehensive_nn_functional_max_pool3d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 753.1840209960938, - "test_comprehensive_nn_functional_max_pool3d_cpu_int32 (__main__.TestInductorOpInfoCPU)": 709.1789855957031, - "test_comprehensive_nn_functional_max_pool3d_cpu_int64 (__main__.TestInductorOpInfoCPU)": 723.7825012207031, - "test_comprehensive_nn_functional_max_pool3d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 505.14124298095703, - "test_comprehensive_nn_functional_max_pool3d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 504.5137481689453, - "test_comprehensive_nn_functional_max_unpool2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 124.45050048828125, - "test_comprehensive_nn_functional_max_unpool2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 129.04349899291992, - "test_comprehensive_nn_functional_max_unpool2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 124.9415054321289, - "test_comprehensive_nn_functional_max_unpool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 61.10431827198375, - "test_comprehensive_nn_functional_max_unpool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 60.84139135609502, - "test_comprehensive_nn_functional_max_unpool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 61.411044245180875, - "test_comprehensive_nn_functional_max_unpool3d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 74.14450073242188, - "test_comprehensive_nn_functional_unfold_cpu_bool (__main__.TestInductorOpInfoCPU)": 70.26900100708008, - "test_comprehensive_nn_functional_unfold_cpu_float16 (__main__.TestInductorOpInfoCPU)": 113.28900146484375, - "test_comprehensive_nn_functional_unfold_cpu_float32 (__main__.TestInductorOpInfoCPU)": 113.8120002746582, - "test_comprehensive_nn_functional_unfold_cpu_float64 (__main__.TestInductorOpInfoCPU)": 118.50249862670898, - "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 110.55724716186523, - "test_comprehensive_ormqr_cuda_complex64 (__main__.TestDecompCUDA)": 109.60800170898438, - "test_comprehensive_ormqr_cuda_float32 (__main__.TestDecompCUDA)": 62.25924873352051, - "test_comprehensive_ormqr_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 69.93900108337402, - "test_comprehensive_ormqr_cuda_float64 (__main__.TestDecompCUDA)": 64.4350004196167, - "test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 67.63899898529053, - "test_comprehensive_svd_cuda_complex64 (__main__.TestDecompCUDA)": 64.41949939727783, - "test_constructor_autograd_SparseBSC_cuda (__main__.TestSparseAnyCUDA)": 88.83725166320801, - "test_constructor_autograd_SparseBSR_cuda (__main__.TestSparseAnyCUDA)": 94.19975090026855, - "test_constructor_autograd_SparseCSC_cuda (__main__.TestSparseAnyCUDA)": 70.39225006103516, - "test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 110.43149948120117, - "test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 267.1796696980794, - "test_conv2d_unary_cpu_cpp_wrapper (__main__.TestCppWrapper)": 83.31399917602539, - "test_conv3d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 73.52913335164388, - "test_conv_bn_fuse_cpu (__main__.CpuTests)": 87.52300071716309, - "test_conv_bn_fuse_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 70.49650128682454, - "test_correctness_AdamW_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 91.59375190734863, - "test_correctness_Adam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 91.47550010681152, - "test_count_nonzero_all (__main__.TestBool)": 671.6698404947916, - "test_create_rand_mask_from_inputs_dynamic_shapes (__main__.DynamicShapesReproTests)": 106.40256757321565, - "test_cusparse_multiple_threads_same_device (__main__.TestCuda)": 61.286570753370015, - "test_custom_module_lstm (__main__.TestQuantizedOps)": 322.2589975992839, - "test_diff_hyperparams_sharding_strategy_str_full_shard (__main__.TestFSDPUseOrigParamsMultipleParamGroups)": 63.88200124104818, - "test_diff_hyperparams_sharding_strategy_str_no_shard (__main__.TestFSDPUseOrigParamsMultipleParamGroups)": 62.30999883015951, - "test_diff_hyperparams_sharding_strategy_str_shard_grad_op (__main__.TestFSDPUseOrigParamsMultipleParamGroups)": 60.74166615804037, - "test_dispatch_symbolic_meta_outplace_all_strides_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestMetaCUDA)": 94.44025039672852, - "test_fail_arithmetic_ops.py (__main__.TestTyping)": 62.17200152079264, - "test_fail_random.py (__main__.TestTyping)": 69.17674193843719, - "test_fn_fwgrad_bwgrad_cumprod_cuda_complex128 (__main__.TestFwdGradientsCUDA)": 89.89649963378906, - "test_fn_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 89.71349906921387, - "test_fn_gradgrad_map_triple_nested_cpu_float64 (__main__.TestBwdGradientsCPU)": 211.4114990234375, - "test_fn_gradgrad_map_triple_nested_cuda_float64 (__main__.TestBwdGradientsCUDA)": 132.2224998474121, - "test_forward_ad_svd_lowrank_cpu_float32 (__main__.TestCompositeComplianceCPU)": 97.98550033569336, - "test_fuse_large_params_cpu (__main__.CpuTests)": 85.25499979654948, - "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 153.6696662902832, - "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 152.69783401489258, - "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 125.90250015258789, - "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 92.34375190734863, - "test_grad_nn_Transformer_cpu_float64 (__main__.TestModuleCPU)": 64.21676149822417, - "test_grad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 123.23225212097168, - "test_gradgrad_nn_LSTM_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 128.2329978942871, - "test_gradgrad_nn_LSTM_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 125.50249862670898, - "test_gradgrad_nn_TransformerDecoderLayer_cuda_float64 (__main__.TestModuleCUDA)": 269.924503326416, - "test_gradgrad_nn_TransformerEncoder_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 125.14425086975098, - "test_gradgrad_nn_TransformerEncoder_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 176.18125534057617, - "test_gradgrad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 571.6790008544922, - "test_indirect_device_assert (__main__.TritonCodeGenTests)": 271.3869934082031, - "test_inductor_no_recursionerror_on_for_loops_dynamic_shapes (__main__.DynamicShapesReproTests)": 70.60233497619629, - "test_inplace_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 94.59475135803223, - "test_inputs_overlapping_with_mutation_stress_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 156.87916564941406, - "test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 116.82549667358398, - "test_linalg_solve_triangular_large_cuda_complex128 (__main__.TestLinalgCUDA)": 122.43350219726562, - "test_linalg_solve_triangular_large_cuda_complex64 (__main__.TestLinalgCUDA)": 97.80849838256836, - "test_linear (__main__.TestStaticQuantizedModule)": 91.20350011189778, - "test_lobpcg_basic_cuda_float64 (__main__.TestLinalgCUDA)": 80.49750232696533, - "test_low_memory_max_pool_dilation_1_dim_3_cpu_halide (__main__.HalideCpuTests)": 585.5243326822916, - "test_low_memory_max_pool_dilation_2_dim_3_cpu_halide (__main__.HalideCpuTests)": 516.3270060221354, - "test_lstm_cpu (__main__.TestMkldnnCPU)": 62.10499954223633, - "test_many_overlapping_inputs_does_not_explode_guards_dynamic_shapes (__main__.DynamicShapesReproTests)": 132.2573331197103, - "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 60.5918337504069, - "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 64.55583381652832, - "test_memory_format_operators_cuda (__main__.TestTorchDeviceTypeCUDA)": 76.3048454110439, - "test_out_variant_custom_op_dynamic_shapes (__main__.DynamicShapesMiscTests)": 79.89417275138523, - "test_proper_exit (__main__.TestDataLoader)": 246.03874969482422, - "test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 238.50450134277344, - "test_python_ref_executor__refs_special_zeta_executor_aten_cuda_float64 (__main__.TestCommonCUDA)": 60.665499210357666, - "test_qat_conv2d_unary (__main__.TestQuantizePT2EX86Inductor)": 144.47583134969076, - "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn1d)": 64.56902594315379, - "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn2d)": 62.75823718623111, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 78.22150039672852, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 91.45999908447266, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 104.36700057983398, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 75.94200134277344, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 90.85449981689453, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 97.81800079345703, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 73.60699844360352, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 86.91350173950195, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 86.44449996948242, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 75.63949966430664, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 100.4010009765625, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 91.33300018310547, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 75.61149978637695, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 87.62849807739258, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 92.85200119018555, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 80.0374984741211, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 91.739501953125, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 89.72999954223633, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 73.18899917602539, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 86.14900207519531, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 86.40299987792969, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 91.67950057983398, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 97.89799880981445, - "test_quick_core_backward__unsafe_masked_index_cpu_float64 (__main__.TestDecompCPU)": 426.1199951171875, - "test_quick_core_backward__unsafe_masked_index_cuda_float64 (__main__.TestDecompCUDA)": 788.5797576904297, - "test_quick_core_backward__unsafe_masked_index_put_accumulate_cpu_float64 (__main__.TestDecompCPU)": 645.885986328125, - "test_quick_core_backward__unsafe_masked_index_put_accumulate_cuda_float64 (__main__.TestDecompCUDA)": 1150.8487548828125, - "test_quick_core_backward_expand_copy_cuda_float64 (__main__.TestDecompCUDA)": 63.335999488830566, - "test_quick_core_backward_nn_functional_max_unpool3d_grad_cpu_float64 (__main__.TestDecompCPU)": 83.89899826049805, - "test_quick_core_backward_nn_functional_max_unpool3d_grad_cuda_float64 (__main__.TestDecompCUDA)": 219.58025360107422, - "test_quick_core_backward_roll_cpu_float64 (__main__.TestDecompCPU)": 94.60549926757812, - "test_quick_core_backward_roll_cuda_float64 (__main__.TestDecompCUDA)": 175.31949615478516, - "test_quick_core_backward_select_scatter_cpu_float64 (__main__.TestDecompCPU)": 80.4379997253418, - "test_quick_core_backward_select_scatter_cuda_float64 (__main__.TestDecompCUDA)": 117.42150115966797, - "test_quick_core_backward_split_with_sizes_copy_cpu_float64 (__main__.TestDecompCPU)": 85.20699691772461, - "test_quick_core_backward_split_with_sizes_copy_cuda_float64 (__main__.TestDecompCUDA)": 140.9692497253418, - "test_quick_core_backward_std_cuda_float64 (__main__.TestDecompCUDA)": 96.70874977111816, - "test_register_spills_cuda (__main__.BenchmarkFusionCudaTest)": 98.57174968719482, - "test_replicatepad_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 67.52949905395508, - "test_runtime_checks_large_cpu (__main__.AOTInductorTestABICompatibleCpu)": 67.56499767303467, - "test_runtime_checks_large_cpu_with_stack_allocation (__main__.AOTInductorTestABICompatibleCpuWithStackAllocation)": 74.22599983215332, - "test_runtime_checks_large_cuda (__main__.AOTInductorTestABICompatibleGpu)": 155.21199798583984, - "test_save_load_large_string_attribute (__main__.TestSaveLoad)": 107.1515007019043, - "test_shuffler_iterdatapipe (__main__.IntegrationTestDataLoaderDataPipe)": 147.1798324584961, - "test_slow_tasks (__main__.TestFunctionalAutogradBenchmark)": 113.45633443196614, - "test_sum_all_cpu_float64 (__main__.TestReductionsCPU)": 247.22645892538694, - "test_svd_lowrank_cuda_complex128 (__main__.TestLinalgCUDA)": 157.11124801635742, - "test_terminate_handler_on_crash (__main__.TestTorch)": 112.97849977016449, - "test_terminate_signal (__main__.ForkTest)": 138.1518301591277, - "test_terminate_signal (__main__.ParallelForkServerShouldWorkTest)": 138.05183368052045, - "test_terminate_signal (__main__.SpawnTest)": 141.89416662851968, - "test_train_parity_multi_group (__main__.TestFullyShard1DTrainingCore)": 174.1542510986328, - "test_triton_bsr_scatter_mm_blocksize_64_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 90.13175010681152, - "test_triton_bsr_scatter_mm_blocksize_64_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 76.49149990081787, - "test_triton_bsr_scatter_mm_blocksize_64_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 94.42874908447266, - "test_triton_bsr_softmax_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 121.6265001296997, - "test_triton_bsr_softmax_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 125.12349891662598, - "test_triton_bsr_softmax_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 107.74774932861328, - "test_unary_ops (__main__.TestTEFuserDynamic)": 165.78299776713052, - "test_unary_ops (__main__.TestTEFuserStatic)": 147.64583269755045, - "test_unwaited (__main__.CommTest)": 60.16999944051107, - "test_variant_consistency_jit_nn_functional_max_pool2d_cpu_float32 (__main__.TestJitCPU)": 83.02149963378906, - "test_variant_consistency_jit_nn_functional_max_pool2d_cuda_float32 (__main__.TestJitCUDA)": 78.58975028991699, - "test_views1_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 61.85185841151646, - "test_vmapjvpvjp_linalg_lstsq_grad_oriented_cpu_float32 (__main__.TestOperatorsCPU)": 90.26150131225586, - "test_vmapjvpvjp_linalg_lstsq_grad_oriented_cuda_float32 (__main__.TestOperatorsCUDA)": 147.89199574788412, - "test_vmapjvpvjp_linalg_lu_solve_cpu_float32 (__main__.TestOperatorsCPU)": 61.03449821472168, - "test_vmapjvpvjp_linalg_lu_solve_cuda_float32 (__main__.TestOperatorsCUDA)": 70.4332504272461, - "test_vmapjvpvjp_linalg_multi_dot_cuda_float32 (__main__.TestOperatorsCUDA)": 75.01950168609619, - "test_vmapjvpvjp_linalg_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 75.30324840545654, - "test_vmapjvpvjp_max_pool2d_with_indices_backward_cpu_float32 (__main__.TestOperatorsCPU)": 71.50250053405762, - "test_vmapjvpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 75.63800144195557, - "test_vmapjvpvjp_nn_functional_conv2d_cpu_float32 (__main__.TestOperatorsCPU)": 62.03466642470587, - "test_vmapjvpvjp_nn_functional_max_pool2d_cpu_float32 (__main__.TestOperatorsCPU)": 168.41749572753906, - "test_vmapjvpvjp_nn_functional_max_pool2d_cuda_float32 (__main__.TestOperatorsCUDA)": 68.871750831604, - "test_vmapjvpvjp_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 77.00125026702881, - "test_vmapjvpvjp_unbind_cpu_float32 (__main__.TestOperatorsCPU)": 94.14950180053711, - "test_vmapjvpvjp_unbind_cuda_float32 (__main__.TestOperatorsCUDA)": 90.05274963378906, - "test_vmapvjpvjp_meshgrid_list_of_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 110.32675170898438, - "test_vmapvjpvjp_meshgrid_variadic_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 92.06575202941895, - "test_vmapvjpvjp_nn_functional_bilinear_cuda_float32 (__main__.TestOperatorsCUDA)": 158.6577491760254 + "EndToEndLSTM (__main__.RNNTest)": 200.1896718343099, + "MultiheadAttention (__main__.ModulesTest)": 141.92533365885416, + "test_AllenaiLongformerBase_repro_cpu_halide (__main__.HalideCpuTests)": 210.3270060221354, + "test__adaptive_avg_pool2d (__main__.CPUReproTests)": 105.85777706570096, + "test_adaptive_max_pool2d1_cpu_halide (__main__.HalideCpuTests)": 115.53966522216797, + "test_after_aot_cpu_runtime_error (__main__.MinifierIsolateTests)": 62.45811038547092, + "test_alexnet_prefix_cpu_halide (__main__.HalideCpuTests)": 177.51766967773438, + "test_aot_autograd_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 74.74966557820638, + "test_aot_autograd_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 68.23533376057942, + "test_aot_autograd_symbolic_exhaustive_masked_norm_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 61.625999450683594, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 134.07366434733072, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 188.88899739583334, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 111.63599904378255, + "test_aot_autograd_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 67.27233378092448, + "test_aot_autograd_symbolic_module_exhaustive_nn_TransformerDecoderLayer_cpu_float32 (__main__.TestEagerFusionModuleInfoCPU)": 105.4979985555013, + "test_avg_pool3d_backward2_cpu (__main__.CpuTests)": 633.0828002929687, + "test_avg_pool3d_backward2_cuda (__main__.GPUTests)": 91.86733309427898, + "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 481.1977776421441, + "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 491.7155592176649, + "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 124.39833196004231, + "test_avg_pool3d_backward_cpu_halide (__main__.HalideCpuTests)": 62.104000091552734, + "test_backward_nn_functional_multi_head_attention_forward_cpu_float32 (__main__.TestCompositeComplianceCPU)": 81.22966766357422, + "test_backward_nn_functional_multi_head_attention_forward_cuda_float32 (__main__.TestCompositeComplianceCUDA)": 69.64550145467122, + "test_basic_cpu (__main__.EfficientConvBNEvalCpuTests)": 175.67355600992838, + "test_basic_cuda (__main__.EfficientConvBNEvalGpuTests)": 125.82333374023438, + "test_checkpointing_without_reentrant_input_requires_grad_False (__main__.TestAutogradWithCompiledAutograd)": 369.5883280436198, + "test_checkpointing_without_reentrant_input_requires_grad_True (__main__.TestAutogradWithCompiledAutograd)": 418.0381130642361, + "test_collect_callgrind (__main__.TestBenchmarkUtils)": 312.76700168185766, + "test_comprehensive_diff_cuda_complex128 (__main__.TestDecompCUDA)": 84.68433380126953, + "test_comprehensive_diff_cuda_complex64 (__main__.TestDecompCUDA)": 86.41216786702473, + "test_comprehensive_diff_cuda_float64 (__main__.TestDecompCUDA)": 60.670833587646484, + "test_comprehensive_grid_sampler_2d_cpu_bfloat16 (__main__.TestDecompCPU)": 84.44266510009766, + "test_comprehensive_grid_sampler_2d_cpu_float16 (__main__.TestDecompCPU)": 86.69533284505208, + "test_comprehensive_grid_sampler_2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 63.40933354695638, + "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestDecompCPU)": 375.11133829752606, + "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 64.89966583251953, + "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestDecompCPU)": 386.1840108235677, + "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 66.45699818929036, + "test_comprehensive_grid_sampler_2d_cuda_bfloat16 (__main__.TestDecompCUDA)": 227.58533223470053, + "test_comprehensive_grid_sampler_2d_cuda_float16 (__main__.TestDecompCUDA)": 236.75483194986978, + "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestDecompCUDA)": 1000.12451171875, + "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 63.72516632080078, + "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestDecompCUDA)": 936.3953450520834, + "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 65.74933242797852, + "test_comprehensive_linalg_lu_solve_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 70.87016677856445, + "test_comprehensive_linalg_lu_solve_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 68.49433453877766, + "test_comprehensive_linalg_solve_triangular_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 74.39149983723958, + "test_comprehensive_linalg_solve_triangular_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 71.41349919637044, + "test_comprehensive_linalg_svd_cuda_complex128 (__main__.TestDecompCUDA)": 61.10983467102051, + "test_comprehensive_linalg_svd_cuda_complex64 (__main__.TestDecompCUDA)": 64.13150151570638, + "test_comprehensive_linalg_vector_norm_cpu_float16 (__main__.TestInductorOpInfoCPU)": 89.73133341471355, + "test_comprehensive_linalg_vector_norm_cpu_float32 (__main__.TestInductorOpInfoCPU)": 86.45633188883464, + "test_comprehensive_linalg_vector_norm_cpu_float64 (__main__.TestInductorOpInfoCPU)": 88.76399993896484, + "test_comprehensive_linalg_vector_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 71.25218469125254, + "test_comprehensive_linalg_vector_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 71.11777793036566, + "test_comprehensive_logspace_cpu_float32 (__main__.TestInductorOpInfoCPU)": 176.61566670735678, + "test_comprehensive_logspace_cpu_float64 (__main__.TestInductorOpInfoCPU)": 173.7596689860026, + "test_comprehensive_logspace_cpu_int32 (__main__.TestInductorOpInfoCPU)": 163.57832845052084, + "test_comprehensive_logspace_cpu_int64 (__main__.TestInductorOpInfoCPU)": 161.29700215657553, + "test_comprehensive_masked_norm_cpu_float16 (__main__.TestInductorOpInfoCPU)": 208.6990000406901, + "test_comprehensive_masked_norm_cpu_float32 (__main__.TestInductorOpInfoCPU)": 198.11366271972656, + "test_comprehensive_masked_norm_cpu_float64 (__main__.TestInductorOpInfoCPU)": 198.788330078125, + "test_comprehensive_masked_norm_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 121.93983332316081, + "test_comprehensive_masked_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 119.3211669921875, + "test_comprehensive_masked_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 113.11850102742513, + "test_comprehensive_nn_functional_fractional_max_pool3d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 121.52633412679036, + "test_comprehensive_nn_functional_fractional_max_pool3d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 114.41900126139323, + "test_comprehensive_nn_functional_fractional_max_pool3d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 120.74099985758464, + "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestDecompCUDA)": 92.1571667989095, + "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float64 (__main__.TestDecompCUDA)": 93.97516759236653, + "test_comprehensive_nn_functional_grid_sample_cpu_float32 (__main__.TestDecompCPU)": 93.90033213297527, + "test_comprehensive_nn_functional_grid_sample_cpu_float64 (__main__.TestDecompCPU)": 102.24433135986328, + "test_comprehensive_nn_functional_grid_sample_cuda_float32 (__main__.TestDecompCUDA)": 237.9564997355143, + "test_comprehensive_nn_functional_grid_sample_cuda_float64 (__main__.TestDecompCUDA)": 263.09083048502606, + "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestDecompCUDA)": 70.44449869791667, + "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 78.58383433024089, + "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestDecompCUDA)": 66.97166633605957, + "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 81.04183451334636, + "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestDecompCUDA)": 89.63233439127605, + "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float64 (__main__.TestDecompCUDA)": 94.67216491699219, + "test_comprehensive_nn_functional_max_pool1d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 168.28499857584634, + "test_comprehensive_nn_functional_max_pool1d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 171.91666666666666, + "test_comprehensive_nn_functional_max_pool1d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 166.12066650390625, + "test_comprehensive_nn_functional_max_pool2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 1279.8836669921875, + "test_comprehensive_nn_functional_max_pool2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 1132.968994140625, + "test_comprehensive_nn_functional_max_pool2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 1118.725341796875, + "test_comprehensive_nn_functional_max_pool2d_cpu_int32 (__main__.TestInductorOpInfoCPU)": 973.7703247070312, + "test_comprehensive_nn_functional_max_pool2d_cpu_int64 (__main__.TestInductorOpInfoCPU)": 972.6750081380209, + "test_comprehensive_nn_functional_max_pool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 1209.7756754557292, + "test_comprehensive_nn_functional_max_pool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 1256.0619710286458, + "test_comprehensive_nn_functional_max_pool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 1281.5216471354167, + "test_comprehensive_nn_functional_max_pool3d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 917.3249918619791, + "test_comprehensive_nn_functional_max_pool3d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 733.1909790039062, + "test_comprehensive_nn_functional_max_pool3d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 724.7653401692709, + "test_comprehensive_nn_functional_max_pool3d_cpu_int32 (__main__.TestInductorOpInfoCPU)": 726.2100219726562, + "test_comprehensive_nn_functional_max_pool3d_cpu_int64 (__main__.TestInductorOpInfoCPU)": 705.0809936523438, + "test_comprehensive_nn_functional_max_pool3d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 517.8646697998047, + "test_comprehensive_nn_functional_max_pool3d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 521.0065002441406, + "test_comprehensive_nn_functional_max_unpool2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 130.64300028483072, + "test_comprehensive_nn_functional_max_unpool2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 124.43033345540364, + "test_comprehensive_nn_functional_max_unpool2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 128.03166707356772, + "test_comprehensive_nn_functional_max_unpool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 64.71049880981445, + "test_comprehensive_nn_functional_max_unpool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 64.55933380126953, + "test_comprehensive_nn_functional_max_unpool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 65.66183217366536, + "test_comprehensive_nn_functional_max_unpool3d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 69.40700022379558, + "test_comprehensive_nn_functional_unfold_cpu_bool (__main__.TestInductorOpInfoCPU)": 74.34766642252605, + "test_comprehensive_nn_functional_unfold_cpu_float16 (__main__.TestInductorOpInfoCPU)": 112.48366800944011, + "test_comprehensive_nn_functional_unfold_cpu_float32 (__main__.TestInductorOpInfoCPU)": 116.27966562906902, + "test_comprehensive_nn_functional_unfold_cpu_float64 (__main__.TestInductorOpInfoCPU)": 117.50433603922527, + "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 106.86666615804036, + "test_comprehensive_ormqr_cuda_complex64 (__main__.TestDecompCUDA)": 94.00083287556966, + "test_comprehensive_ormqr_cuda_float32 (__main__.TestDecompCUDA)": 62.15316645304362, + "test_comprehensive_ormqr_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 69.82649993896484, + "test_comprehensive_ormqr_cuda_float64 (__main__.TestDecompCUDA)": 61.87600072224935, + "test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 69.6066665649414, + "test_comprehensive_svd_cuda_complex64 (__main__.TestDecompCUDA)": 68.90516599019368, + "test_constructor_autograd_SparseBSC_cuda (__main__.TestSparseAnyCUDA)": 102.65083312988281, + "test_constructor_autograd_SparseBSR_cuda (__main__.TestSparseAnyCUDA)": 85.81283442179362, + "test_constructor_autograd_SparseCSC_cuda (__main__.TestSparseAnyCUDA)": 70.68100102742513, + "test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 98.76588948567708, + "test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 229.82177903917102, + "test_conv2d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 81.8357684795673, + "test_conv2d_unary_cpu_cpp_wrapper (__main__.TestCppWrapper)": 135.92233530680338, + "test_conv3d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 141.42266845703125, + "test_conv3d_binary_dynamic_shapes_cpu (__main__.TestDynamicPatternMatcherGenericCPU)": 74.59500092726488, + "test_conv3d_unary_dynamic_shapes_cpu (__main__.TestDynamicPatternMatcherGenericCPU)": 64.01784662099985, + "test_conv_bn_fuse_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 73.09766684638129, + "test_correctness_AdamW_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 95.88766733805339, + "test_correctness_Adam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 94.47416687011719, + "test_count_nonzero_all (__main__.TestBool)": 641.161878797743, + "test_custom_module_lstm (__main__.TestQuantizedOps)": 307.93677775065106, + "test_ddp_uneven_inputs (__main__.TestDistBackendWithSpawn)": 302.5940024058024, + "test_dispatch_symbolic_meta_outplace_all_strides_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestMetaCUDA)": 81.91116714477539, + "test_dtensor_op_db_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestDTensorOpsCPU)": 88.2913335164388, + "test_error_detection_and_propagation (__main__.NcclErrorHandlingTest)": 67.36266835530598, + "test_fail_arithmetic_ops.py (__main__.TestTyping)": 60.49377780490451, + "test_fail_creation_ops.py (__main__.TestTyping)": 68.32106041185784, + "test_fn_fwgrad_bwgrad_cumprod_cuda_complex128 (__main__.TestFwdGradientsCUDA)": 76.85566584269206, + "test_fn_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 91.61366780598958, + "test_fn_gradgrad_map_triple_nested_cpu_float64 (__main__.TestBwdGradientsCPU)": 204.6830037434896, + "test_fn_gradgrad_map_triple_nested_cuda_float64 (__main__.TestBwdGradientsCUDA)": 134.79716873168945, + "test_fuse_large_params_cpu (__main__.CpuTests)": 97.0917501449585, + "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 150.09088897705078, + "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 147.25677744547525, + "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 125.67216491699219, + "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 94.74416732788086, + "test_grad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 98.06850051879883, + "test_gradgrad_nn_LSTM_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 150.5540008544922, + "test_gradgrad_nn_LSTM_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 139.7729949951172, + "test_gradgrad_nn_TransformerDecoderLayer_cuda_float64 (__main__.TestModuleCUDA)": 232.7606684366862, + "test_gradgrad_nn_TransformerEncoder_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 154.89383188883463, + "test_gradgrad_nn_TransformerEncoder_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 156.3326670328776, + "test_gradgrad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 650.9168192545573, + "test_grid_sampler_2d_cpu_halide (__main__.HalideCpuTests)": 195.89266459147134, + "test_indirect_device_assert (__main__.TritonCodeGenTests)": 273.2460021972656, + "test_inductor_no_recursionerror_on_for_loops_dynamic_shapes (__main__.DynamicShapesReproTests)": 66.99511040581598, + "test_inplace_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 101.2813351949056, + "test_inputs_overlapping_with_mutation_stress_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 154.23166741265192, + "test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 116.40700022379558, + "test_linalg_solve_triangular_large_cuda_complex128 (__main__.TestLinalgCUDA)": 123.70700073242188, + "test_linalg_solve_triangular_large_cuda_complex64 (__main__.TestLinalgCUDA)": 95.7520014444987, + "test_linear (__main__.TestStaticQuantizedModule)": 62.20888815985786, + "test_lstm_cpu (__main__.TestMkldnnCPU)": 102.4893315633138, + "test_many_overlapping_inputs_does_not_explode_guards_dynamic_shapes (__main__.DynamicShapesReproTests)": 127.22689056396484, + "test_max_pool2d2_cpu_halide (__main__.HalideCpuTests)": 431.17966715494794, + "test_max_pool2d3_cpu_halide (__main__.HalideCpuTests)": 133.41966756184897, + "test_max_pool2d5_cpu_halide (__main__.HalideCpuTests)": 360.4186706542969, + "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 60.48455513848199, + "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 63.52433310614692, + "test_proper_exit (__main__.TestDataLoader)": 234.38233439127603, + "test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 242.4615020751953, + "test_python_ref_executor__refs_special_zeta_executor_aten_cuda_float64 (__main__.TestCommonCUDA)": 65.31966749827068, + "test_qat_conv2d_unary (__main__.TestQuantizePT2EX86Inductor)": 150.28666602240668, + "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn1d)": 65.1363112979465, + "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn2d)": 63.50664397345649, + "test_qat_mobilenet_v2 (__main__.TestQuantizePT2EQATModels)": 62.56345471468839, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 73.45999908447266, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 88.02366638183594, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 85.85933430989583, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 74.7816670735677, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 88.31666564941406, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 89.21133422851562, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 73.58400217692058, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 85.65733337402344, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 94.56866709391277, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 80.31666564941406, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 95.52099863688152, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 92.52433522542317, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 75.57466634114583, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 96.05966695149739, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 88.94766743977864, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 77.00899759928386, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 95.18199920654297, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 88.22000122070312, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 69.10733286539714, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 84.89466603597005, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 85.52066548665364, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 93.1520004272461, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 91.66366831461589, + "test_quick_core_backward__unsafe_masked_index_cpu_float64 (__main__.TestDecompCPU)": 370.8893330891927, + "test_quick_core_backward__unsafe_masked_index_cuda_float64 (__main__.TestDecompCUDA)": 733.5455017089844, + "test_quick_core_backward__unsafe_masked_index_put_accumulate_cpu_float64 (__main__.TestDecompCPU)": 605.9030151367188, + "test_quick_core_backward__unsafe_masked_index_put_accumulate_cuda_float64 (__main__.TestDecompCUDA)": 1136.014139811198, + "test_quick_core_backward_expand_copy_cuda_float64 (__main__.TestDecompCUDA)": 72.65350023905437, + "test_quick_core_backward_nn_functional_max_unpool3d_grad_cpu_float64 (__main__.TestDecompCPU)": 64.6456667582194, + "test_quick_core_backward_nn_functional_max_unpool3d_grad_cuda_float64 (__main__.TestDecompCUDA)": 207.27167002360025, + "test_quick_core_backward_roll_cpu_float64 (__main__.TestDecompCPU)": 91.64166768391927, + "test_quick_core_backward_roll_cuda_float64 (__main__.TestDecompCUDA)": 167.19299825032553, + "test_quick_core_backward_select_scatter_cpu_float64 (__main__.TestDecompCPU)": 64.22866694132487, + "test_quick_core_backward_select_scatter_cuda_float64 (__main__.TestDecompCUDA)": 116.8476676940918, + "test_quick_core_backward_split_with_sizes_copy_cpu_float64 (__main__.TestDecompCPU)": 70.6433334350586, + "test_quick_core_backward_split_with_sizes_copy_cuda_float64 (__main__.TestDecompCUDA)": 137.72866566975912, + "test_quick_core_backward_std_cuda_float64 (__main__.TestDecompCUDA)": 87.72266642252605, + "test_register_spills_cuda (__main__.BenchmarkFusionCudaTest)": 78.25366719563802, + "test_replicatepad_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 67.75999959309895, + "test_runtime_checks_large_cpu (__main__.AOTInductorTestABICompatibleCpu)": 68.58633486429851, + "test_runtime_checks_large_cpu_with_stack_allocation (__main__.AOTInductorTestABICompatibleCpuWithStackAllocation)": 76.43899959988065, + "test_runtime_checks_large_cuda (__main__.AOTInductorTestABICompatibleGpu)": 155.9663340250651, + "test_save_load_large_string_attribute (__main__.TestSaveLoad)": 110.39933268229167, + "test_sdpa_kernel_ctx_manager2_dynamic_shapes (__main__.DynamicShapesCtxManagerTests)": 85.31637557347615, + "test_shuffler_iterdatapipe (__main__.IntegrationTestDataLoaderDataPipe)": 136.4769990709093, + "test_slow_tasks (__main__.TestFunctionalAutogradBenchmark)": 113.9978896247016, + "test_sort_stable_cpu (__main__.CpuTritonTests)": 76.96166737874348, + "test_split_cumsum_cpu (__main__.CpuTritonTests)": 89.43966674804688, + "test_svd_lowrank_cuda_complex128 (__main__.TestLinalgCUDA)": 149.7841674486796, + "test_tensor_split (__main__.TestVmapOperators)": 76.2336671680021, + "test_terminate_handler_on_crash (__main__.TestTorch)": 111.58677675988939, + "test_terminate_signal (__main__.ForkTest)": 136.8188896137807, + "test_terminate_signal (__main__.ParallelForkServerShouldWorkTest)": 136.99289169742002, + "test_terminate_signal (__main__.SpawnTest)": 140.61755683687, + "test_train_parity_multi_group_unshard_async_op (__main__.TestFullyShard1DTrainingCore)": 69.51326649983724, + "test_triton_bsr_scatter_mm_blocksize_64_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 68.61666615804036, + "test_triton_bsr_scatter_mm_blocksize_64_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 65.95349820454915, + "test_triton_bsr_scatter_mm_blocksize_64_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 66.64900016784668, + "test_triton_bsr_softmax_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 122.68766657511394, + "test_triton_bsr_softmax_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 120.926331837972, + "test_triton_bsr_softmax_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 104.47883415222168, + "test_unary_ops (__main__.TestTEFuserDynamic)": 172.1952222188314, + "test_unary_ops (__main__.TestTEFuserStatic)": 158.92655531565347, + "test_upsample_bicubic2d_cpu_halide (__main__.HalideCpuTests)": 96.95966339111328, + "test_variant_consistency_jit_nn_functional_max_pool2d_cpu_float32 (__main__.TestJitCPU)": 90.34199778238933, + "test_variant_consistency_jit_nn_functional_max_pool2d_cuda_float32 (__main__.TestJitCUDA)": 69.39216740926106, + "test_views1_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 73.56816864013672, + "test_vmapjvpvjp_linalg_lstsq_grad_oriented_cpu_float32 (__main__.TestOperatorsCPU)": 96.19633483886719, + "test_vmapjvpvjp_linalg_lstsq_grad_oriented_cuda_float32 (__main__.TestOperatorsCUDA)": 93.57866668701172, + "test_vmapjvpvjp_linalg_lu_solve_cuda_float32 (__main__.TestOperatorsCUDA)": 95.94100189208984, + "test_vmapjvpvjp_linalg_multi_dot_cuda_float32 (__main__.TestOperatorsCUDA)": 71.65300051371257, + "test_vmapjvpvjp_linalg_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 84.81466547648112, + "test_vmapjvpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 100.53633308410645, + "test_vmapjvpvjp_nn_functional_max_pool2d_cpu_float32 (__main__.TestOperatorsCPU)": 69.77733103434245, + "test_vmapjvpvjp_nn_functional_max_pool2d_cuda_float32 (__main__.TestOperatorsCUDA)": 67.43849881490071, + "test_vmapjvpvjp_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 77.40583229064941, + "test_vmapjvpvjp_unbind_cpu_float32 (__main__.TestOperatorsCPU)": 64.32900110880534, + "test_vmapjvpvjp_unbind_cuda_float32 (__main__.TestOperatorsCUDA)": 71.61133193969727, + "test_vmapvjpvjp_linalg_lstsq_cuda_float32 (__main__.TestOperatorsCUDA)": 60.90399932861328, + "test_vmapvjpvjp_meshgrid_list_of_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 76.39033381144206, + "test_vmapvjpvjp_meshgrid_variadic_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 77.00383377075195, + "test_vmapvjpvjp_nn_functional_bilinear_cuda_float32 (__main__.TestOperatorsCUDA)": 143.61550013224283 } \ No newline at end of file