From 21eeedb4554edab22b42bcb2f75f19e85652b72e Mon Sep 17 00:00:00 2001
From: Xu Han <xu.han@outlook.com>
Date: Thu, 4 Jul 2024 19:08:56 +0000
Subject: [PATCH] [Inductor] Add aot_mode UT to new cpp_builder. (#130105)

Changes:
1. Add `aot_mode` parameter to `validate_new_cpp_commands` UT.
2. Switch AotCodeCompiler vec isa command gen to new cpp_builder.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/130105
Approved by: https://github.com/jgong5, https://github.com/jansel
---
 torch/_inductor/codecache.py   | 70 ++++++++++++++++++++++------------
 torch/_inductor/cpp_builder.py | 51 ++++++++++++++-----------
 2 files changed, 75 insertions(+), 46 deletions(-)

diff --git a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py
index ced974b0a9c..9fb421ae5ea 100644
--- a/torch/_inductor/codecache.py
+++ b/torch/_inductor/codecache.py
@@ -76,6 +76,7 @@ from torch._inductor.cpp_builder import (
     CppOptions,
     CppTorchCudaOptions,
     get_compiler_version_info,
+    get_name_and_dir_from_output_file_path,
 )
 from torch._inductor.cpu_vec_isa import invalid_vec_isa, pick_vec_isa, VecISA
 from torch._inductor.runtime.compile_tasks import (
@@ -1595,15 +1596,22 @@ class AotCodeCompiler:
         cuda: bool,
     ) -> str:
         picked_vec_isa = pick_vec_isa()
-        cpp_command = repr(
-            cpp_compile_command(
-                "i",
-                "o",
+        vec_isa_cmd_gen = CppBuilder(
+            name="o",
+            sources="i",
+            BuildOption=CppTorchCudaOptions(
                 vec_isa=picked_vec_isa,
                 cuda=cuda,
                 aot_mode=graph.aot_mode,
-            )
+            ),
         )
+        # write function will calc source_code hash, the same source code with different
+        # ISA level should be generate different hash.
+        # So we need get a command_line which contains isa related parameter as a part of hash key.
+        # And then pass the command_line to below write function as extra parameter to
+        # guarantee the source code hash contains ISA difference.
+        cpp_command = repr(vec_isa_cmd_gen.get_command_line())
+
         fbcode_aot_cpu_re = False
         use_absolute_path = False
         if config.is_fbcode():
@@ -1853,7 +1861,6 @@ class AotCodeCompiler:
                 "linux": _compile_consts_linux,
                 "darwin": _compile_consts_darwin,
             }[sys.platform](aot_constants)
-
             link_cmd = cpp_compile_command(
                 input=[output_o, consts_o],
                 output=output_so,
@@ -2051,8 +2058,6 @@ class CppCodeCache:
 
         _set_gpu_runtime_env()  # cpp_extension consults the env
 
-        from torch._inductor.cpp_builder import CppBuilder, CppTorchCudaOptions
-
         command_gen = CppBuilder(
             name="o", sources="i", BuildOption=CppTorchCudaOptions(**compile_command)
         )
@@ -2363,35 +2368,48 @@ def _do_validate_cpp_commands(
     compile_only: bool,
     mmap_weights: bool,
     use_absolute_path: bool,
+    aot_mode: bool,
 ):
     # PreCI will failed if test machine can't run cuda.
     temp_dir = tempfile.TemporaryDirectory()
     test_dir_path = temp_dir.name
     test_cuda = torch.cuda.is_available() and cuda
-    input_path = os.path.join(test_dir_path, "dummy_input.cpp")
-    output_path = os.path.join(test_dir_path, "dummy_output.so")
+    input_path = os.path.join(test_dir_path, "dummy_file.cpp")
+    output_path = os.path.join(test_dir_path, "dummy_file.so")
     extra_flags = ["-D TEST_EXTRA_FLAGS"]
     if compile_only:
-        output_path = os.path.join(test_dir_path, "dummy_output.o")
+        output_path = os.path.join(test_dir_path, "dummy_file.o")
     picked_isa = pick_vec_isa()
 
+    # Simulate fb_code env:
+    if not (aot_mode and not use_absolute_path):
+        input_path = os.path.basename(input_path)
+        output_path = os.path.basename(output_path)
+
+    # Fix test_new_cpp_build_logical failed on MacOS
+    if sys.platform != "linux":
+        aot_mode = False
+
     old_cmd = cpp_compile_command(
         input=input_path,
         output=output_path,
         include_pytorch=include_pytorch,
         vec_isa=picked_isa,
         cuda=test_cuda,
-        aot_mode=False,
+        aot_mode=aot_mode,
         compile_only=compile_only,
         use_absolute_path=use_absolute_path,
         use_mmap_weights=mmap_weights,
         extra_flags=extra_flags,
     ).split(" ")
 
+    name, dir = get_name_and_dir_from_output_file_path(input_path)
+
     dummy_build_option = CppTorchCudaOptions(
         vec_isa=picked_isa,
         include_pytorch=include_pytorch,
         cuda=test_cuda,
+        aot_mode=aot_mode,
         compile_only=compile_only,
         use_absolute_path=use_absolute_path,
         use_mmap_weights=mmap_weights,
@@ -2399,10 +2417,10 @@ def _do_validate_cpp_commands(
     )
 
     dummy_builder = CppBuilder(
-        name="dummy_output",
+        name=name,
         sources=input_path,
+        output_dir=dir,
         BuildOption=dummy_build_option,
-        output_dir=test_dir_path,
     )
     new_cmd = dummy_builder.get_command_line().split(" ")
 
@@ -2419,22 +2437,26 @@ def validate_new_cpp_commands():
     compile_only = [True, False]
     include_pytorch = [True, False]
     use_absolute_path = [True, False]
+    aot_mode = [False, True]
 
     for x in cuda:
         for y in use_mmap_weights:
             for z in compile_only:
                 for m in include_pytorch:
                     for n in use_absolute_path:
-                        print(
-                            f"!!! cuda:{x}, use_mmap_weights:{y}, compile_only:{z}, include_pytorch:{m}， use_absolute_path:{n}"
-                        )
-                        _do_validate_cpp_commands(
-                            include_pytorch=m,
-                            cuda=x,
-                            mmap_weights=y,
-                            compile_only=z,
-                            use_absolute_path=n,
-                        )
+                        for o in aot_mode:
+                            print(
+                                f"!!! cuda:{x}, use_mmap_weights:{y}, compile_only:{z}, include_pytorch:{m},"
+                                f" use_absolute_path:{n}, aot_mode:{o}"
+                            )
+                            _do_validate_cpp_commands(
+                                include_pytorch=m,
+                                cuda=x,
+                                mmap_weights=y,
+                                compile_only=z,
+                                use_absolute_path=n,
+                                aot_mode=o,
+                            )
 
 
 @clear_on_fresh_inductor_cache
diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py
index 88a9474f8a9..f288d2a6e8a 100644
--- a/torch/_inductor/cpp_builder.py
+++ b/torch/_inductor/cpp_builder.py
@@ -540,20 +540,6 @@ def _setup_standard_sys_libs(
     return cflags, include_dirs, passthough_args
 
 
-@functools.lru_cache
-def _cpp_prefix_path() -> str:
-    from torch._inductor.codecache import write  # TODO
-
-    path = Path(Path(__file__).parent).parent / "codegen/cpp_prefix.h"
-    with path.open() as f:
-        content = f.read()
-        _, filename = write(
-            content,
-            "h",
-        )
-    return filename
-
-
 def _get_build_args_of_chosen_isa(vec_isa: VecISA):
     macros = []
     build_flags = []
@@ -939,14 +925,17 @@ def get_cpp_torch_cuda_options(cuda: bool, aot_mode: bool = False):
     libraries_dirs: List[str] = []
     libraries: List[str] = []
     passthough_args: List[str] = []
-
+    """
     if (
         config.is_fbcode()
         and "CUDA_HOME" not in os.environ
         and "CUDA_PATH" not in os.environ
     ):
         os.environ["CUDA_HOME"] = build_paths.cuda()
+    """
+    from torch._inductor.codecache import _set_gpu_runtime_env, cpp_prefix_path
 
+    _set_gpu_runtime_env()
     from torch.utils import cpp_extension
 
     include_dirs = cpp_extension.include_paths(cuda)
@@ -971,8 +960,9 @@ def get_cpp_torch_cuda_options(cuda: bool, aot_mode: bool = False):
                     libraries += ["c10_cuda", "cuda", "torch_cuda"]
 
     if aot_mode:
-        cpp_prefix_include_dir = [f"{os.path.dirname(_cpp_prefix_path())}"]
-        include_dirs += cpp_prefix_include_dir
+        if config.is_fbcode():
+            cpp_prefix_include_dir = [f"{os.path.dirname(cpp_prefix_path())}"]
+            include_dirs += cpp_prefix_include_dir
 
         if cuda and torch.version.hip is None:
             _transform_cuda_paths(libraries_dirs)
@@ -1061,15 +1051,26 @@ class CppTorchCudaOptions(CppTorchOptions):
 
 
 def get_name_and_dir_from_output_file_path(
-    aot_mode: bool, use_absolute_path: bool, file_path: str
+    file_path: str,
 ):
+    """
+    This function help prepare parameters to new cpp_builder.
+    Example:
+        input_code: /tmp/tmpof1n5g7t/5c/c5crkkcdvhdxpktrmjxbqkqyq5hmxpqsfza4pxcf3mwk42lphygc.cpp
+        name, dir = get_name_and_dir_from_output_file_path(input_code)
+    Run result:
+        name = c5crkkcdvhdxpktrmjxbqkqyq5hmxpqsfza4pxcf3mwk42lphygc
+        dir = /tmp/tmpof1n5g7t/5c/
+
+    put 'name' and 'dir' to CppBuilder's 'name' and 'output_dir'.
+    CppBuilder --> get_target_file_path will format output path accoding OS:
+    Linux: /tmp/tmppu87g3mm/zh/czhwiz4z7ca7ep3qkxenxerfjxy42kehw6h5cjk6ven4qu4hql4i.so
+    Windows: [Windows temp path]/tmppu87g3mm/zh/czhwiz4z7ca7ep3qkxenxerfjxy42kehw6h5cjk6ven4qu4hql4i.dll
+    """
     name_and_ext = os.path.basename(file_path)
     name, ext = os.path.splitext(name_and_ext)
     dir = os.path.dirname(file_path)
 
-    if config.is_fbcode():
-        if not (aot_mode and not use_absolute_path):
-            dir = "."
     return name, dir
 
 
@@ -1118,17 +1119,23 @@ class CppBuilder:
         self._target_file = ""
 
         self._use_absolute_path: bool = False
+        self._aot_mode: bool = False
 
         self._name = name
 
         # Code start here, initial self internal veriables firstly.
         self._compiler = BuildOption.get_compiler()
         self._use_absolute_path = BuildOption.get_use_absolute_path()
+        self._aot_mode = BuildOption.get_aot_mode()
 
+        """
+        TODO: validate and remove:
         if len(output_dir) == 0:
             self._output_dir = os.path.dirname(os.path.abspath(__file__))
         else:
             self._output_dir = output_dir
+        """
+        self._output_dir = output_dir
 
         self._compile_only = BuildOption.get_compile_only()
         file_ext = (
@@ -1142,7 +1149,7 @@ class CppBuilder:
             sources = [sources]
 
         if config.is_fbcode():
-            if BuildOption.get_aot_mode() and not self._use_absolute_path:
+            if self._aot_mode and not self._use_absolute_path:
                 inp_name = sources
                 # output process @ get_name_and_dir_from_output_file_path
             else: