From 16b21fa8b288140e5067d63e46f670aca495b4cd Mon Sep 17 00:00:00 2001 From: Xu Han Date: Thu, 17 Jul 2025 15:43:20 +0000 Subject: [PATCH] [AOTI] skip ld and objcopy on Windows. (#158545) Skip `ld` and `objcopy` on Windows. They are not support on Windows. Pull Request resolved: https://github.com/pytorch/pytorch/pull/158545 Approved by: https://github.com/desertfire --- torch/_inductor/codecache.py | 66 +++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py index 75e12943706..c8b23aded15 100644 --- a/torch/_inductor/codecache.py +++ b/torch/_inductor/codecache.py @@ -2171,40 +2171,44 @@ ATTRIBUTE_NO_SANITIZE_ADDRESS\t\n""" cubins_o = [] asm_files = [] - ld, objcopy = get_ld_and_objcopy(use_relative_path) - for kernel_name, value in CudaKernelParamCache.cache.items(): - if asm_file := value["asm"]: - asm_files.append(asm_file) + if not _IS_WINDOWS: + ld, objcopy = get_ld_and_objcopy(use_relative_path) + for kernel_name, value in CudaKernelParamCache.cache.items(): + if asm_file := value["asm"]: + asm_files.append(asm_file) - cubin_file = value[get_cpp_wrapper_cubin_path_name()] - if config.aot_inductor.emit_multi_arch_kernel and device_type == "cuda": - current_arch = _nvcc_arch_as_compile_option() - cmd = ( - f"{_cuda_compiler()} -fatbin {asm_file} -o {cubin_file} " - # Triton only allows generating PTX version as same as the current arch - f"-gencode arch=compute_{current_arch},code=compute_{current_arch} " - # Include SASS for the current specific arch - f"-gencode arch=compute_{current_arch},code=sm_{current_arch} " - ) - try: - subprocess.run( - cmd.split(), - capture_output=True, - text=True, - check=True, + cubin_file = value[get_cpp_wrapper_cubin_path_name()] + if ( + config.aot_inductor.emit_multi_arch_kernel + and device_type == "cuda" + ): + current_arch = _nvcc_arch_as_compile_option() + cmd = ( + f"{_cuda_compiler()} -fatbin {asm_file} -o {cubin_file} " + # Triton only allows generating PTX version as same as the current arch + f"-gencode arch=compute_{current_arch},code=compute_{current_arch} " + # Include SASS for the current specific arch + f"-gencode arch=compute_{current_arch},code=sm_{current_arch} " ) - except subprocess.CalledProcessError as e: - print( - f"{cmd} failed with:\nstdout:\n{e.stdout}\nstderr:\n{e.stderr}", - file=sys.stderr, - ) - raise + try: + subprocess.run( + cmd.split(), + capture_output=True, + text=True, + check=True, + ) + except subprocess.CalledProcessError as e: + print( + f"{cmd} failed with:\nstdout:\n{e.stdout}\nstderr:\n{e.stderr}", + file=sys.stderr, + ) + raise - if config.aot_inductor.embed_kernel_binary: - # Embed cubin files into model.so using objcopy - cubins_o.append( - convert_cubin_to_obj(cubin_file, kernel_name, ld, objcopy) - ) + if config.aot_inductor.embed_kernel_binary: + # Embed cubin files into model.so using objcopy + cubins_o.append( + convert_cubin_to_obj(cubin_file, kernel_name, ld, objcopy) + ) output_name, output_dir = get_name_and_dir_from_output_file_path(output_so) so_build_options = CppTorchDeviceOptions(