Refactor local lint (#58798)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/58798 In #58623 there was a bug in `make quicklint` where ShellCheck would run on the entire repo when there were no files. This PR fixes that by refactoring out common stuff (like skipping quicklint when there are no files, let checks do their own file filtering) and pushes the logic into a runner class. Test Plan: Imported from OSS Reviewed By: samestep Differential Revision: D28649889 Pulled By: driazati fbshipit-source-id: b19f32cdb63396c806cb689b2f6daf97e1724d44
2025-12-06 12:20:52 +01:00 · 2021-05-24 13:49:47 -07:00 · 2021-05-24 13:49:47 -07:00 · a679bb5ecf
commit a679bb5ecf
parent a7f4f80903
3 changed files with 218 additions and 211 deletions
--- a/3
+++ b/3
@ -46,7 +46,6 @@ shellcheck:
 		--job 'shellcheck' \
 		--step 'Extract scripts from GitHub Actions workflows'
 	@$(PYTHON) tools/actions_local_runner.py \
 		--file-filter '.sh' \
 		$(CHANGED_ONLY) \
 		--job 'shellcheck'
@ -90,13 +89,11 @@ quick_checks:
 flake8:
 	@$(PYTHON) tools/actions_local_runner.py \
 		--file-filter '.py' \
 		$(CHANGED_ONLY) \
 		--job 'flake8-py3'
 mypy:
 	@$(PYTHON) tools/actions_local_runner.py \
 		--file-filter '.py' \
 		$(CHANGED_ONLY) \
 		--job 'mypy'
--- a/tools/actions_local_runner.py
+++ b/tools/actions_local_runner.py
@ -13,7 +13,7 @@ import fnmatch
 import shlex
 import configparser
-from typing import List, Dict, Any, Optional, Tuple, Union
+from typing import List, Dict, Any, Optional, Union, NamedTuple, Set
 REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@ -87,20 +87,28 @@ def find_changed_files() -> List[str]:
 def print_results(job_name: str, passed: bool, streams: List[str]) -> None:
-    header(job_name, passed)
+    icon = color(col.GREEN, "✓") if passed else color(col.RED, "x")
    print(f"{icon} {color(col.BLUE, job_name)}")
    for stream in streams:
        stream = stream.strip()
        if stream != "":
            print(stream)
 class CommandResult(NamedTuple):
    passed: bool
    stdout: str
    stderr: str
 async def shell_cmd(
    cmd: Union[str, List[str]],
    env: Optional[Dict[str, Any]] = None,
    redirect: bool = True,
-) -> Tuple[bool, str, str]:
+) -> CommandResult:
    if isinstance(cmd, list):
-        cmd_str = ' '.join(shlex.quote(arg) for arg in cmd)
+        cmd_str = " ".join(shlex.quote(arg) for arg in cmd)
    else:
        cmd_str = cmd
@ -117,82 +125,113 @@ async def shell_cmd(
    passed = proc.returncode == 0
    if not redirect:
-        return passed, "", ""
+        return CommandResult(passed, "", "")
-    return passed, stdout.decode().strip(), stderr.decode().strip()
+    return CommandResult(passed, stdout.decode().strip(), stderr.decode().strip())
-def header(name: str, passed: bool) -> None:
+class Check:
-    PASS = color(col.GREEN, "✓")
+    name: str
-    FAIL = color(col.RED, "x")
+
-    icon = PASS if passed else FAIL
+    def __init__(self, files: Optional[List[str]], quiet: bool):
-    print(f"{icon} {color(col.BLUE, name)}")
+        self.quiet = quiet
        self.files = files
    async def run(self) -> bool:
        result = await self.run_helper()
        if result is None:
            return True
        streams = []
        if not result.passed:
            streams = [
                result.stderr,
                result.stdout,
            ]
        print_results(self.name, result.passed, streams)
        return result.passed
    async def run_helper(self) -> Optional[CommandResult]:
        if self.files is not None:
            relevant_files = self.filter_files(self.files)
            if len(relevant_files) == 0:
                # No files, do nothing
                return CommandResult(passed=True, stdout="", stderr="")
            return await self.quick(relevant_files)
        return await self.full()
    def filter_ext(self, files: List[str], extensions: Set[str]) -> List[str]:
        def passes(filename: str) -> bool:
            return os.path.splitext(filename)[1] in extensions
        return [f for f in files if passes(f)]
    def filter_files(self, files: List[str]) -> List[str]:
        return files
    async def quick(self, files: List[str]) -> CommandResult:
        raise NotImplementedError
    async def full(self) -> Optional[CommandResult]:
        raise NotImplementedError
-def get_flake_excludes() -> List[str]:
+class Flake8(Check):
    name = "flake8"
    def filter_files(self, files: List[str]) -> List[str]:
        config = configparser.ConfigParser()
        config.read(os.path.join(REPO_ROOT, ".flake8"))
-    excludes = re.split(r',\s*', config["flake8"]["exclude"].strip())
+        excludes = re.split(r",\s*", config["flake8"]["exclude"].strip())
        excludes = [e.strip() for e in excludes if e.strip() != ""]
    return excludes
 async def run_flake8(files: Optional[List[str]], quiet: bool) -> bool:
    cmd = ["flake8"]
    excludes = get_flake_excludes()
        def should_include(name: str) -> bool:
            for exclude in excludes:
                if fnmatch.fnmatch(name, pat=exclude):
                    return False
-            if name.startswith(exclude) or ("./" + name).startswith(exclude):
+                if name.startswith(exclude) or f"./{name}".startswith(exclude):
                    return False
            return True
-    if files is not None:
+        files = self.filter_ext(files, {".py"})
-        files = [f for f in files if should_include(f)]
+        return [f for f in files if should_include(f)]
-        if len(files) == 0:
+    async def quick(self, files: List[str]) -> CommandResult:
-            print_results("flake8", True, [])
+        return await shell_cmd(["flake8"] + files)
            return True
-        # Running quicklint, pass in an explicit list of files (unlike mypy,
+    async def full(self) -> CommandResult:
-        # flake8 will still use .flake8 to filter this list by the 'exclude's
+        return await shell_cmd(["flake8"])
        # in the config
        cmd += files
    passed, stdout, stderr = await shell_cmd(cmd)
    print_results("flake8", passed, [stdout, stderr])
    return passed
-async def run_mypy(files: Optional[List[str]], quiet: bool) -> bool:
+class Mypy(Check):
    name = "mypy (skipped typestub generation)"
    def filter_files(self, files: List[str]) -> List[str]:
        return self.filter_ext(files, {".py", ".pyi"})
    def env(self) -> Dict[str, Any]:
        env = os.environ.copy()
        if should_color():
            # Secret env variable: https://github.com/python/mypy/issues/7771
            env["MYPY_FORCE_COLOR"] = "1"
        return env
-    if files is not None:
+    async def quick(self, files: List[str]) -> CommandResult:
-        # Running quick lint, use mypy-wrapper instead so it checks that the files
+        return await shell_cmd(
-        # actually should be linted
+            [sys.executable, "tools/mypy_wrapper.py"]
-
+            + [os.path.join(REPO_ROOT, f) for f in files],
-        passed, stdout, stderr = await shell_cmd(
+            env=self.env(),
            [sys.executable, "tools/mypy_wrapper.py"] + [
                os.path.join(REPO_ROOT, f) for f in files
            ],
            env=env,
        )
-        print_results("mypy (skipped typestub generation)", passed, [
+    async def full(self) -> None:
-            stdout + "\n",
+        env = self.env()
-            stderr + "\n",
+        # hackily change the name
-        ])
+        self.name = "mypy"
        return passed
-    # Not running quicklint, so use lint.yml
+        await shell_cmd(
    _, _, _ = await shell_cmd(
            [
                sys.executable,
                "tools/actions_local_runner.py",
@ -206,7 +245,8 @@ async def run_mypy(files: Optional[List[str]], quiet: bool) -> bool:
            redirect=False,
            env=env,
        )
-    passed, _, _ = await shell_cmd(
+
        await shell_cmd(
            [
                sys.executable,
                "tools/actions_local_runner.py",
@ -220,27 +260,21 @@ async def run_mypy(files: Optional[List[str]], quiet: bool) -> bool:
            redirect=False,
            env=env,
        )
    return passed
 class ShellCheck(Check):
    name = "shellcheck: Run ShellCheck"
-async def run_shellcheck(files: Optional[List[str]], quiet: bool) -> bool:
+    def filter_files(self, files: List[str]) -> List[str]:
-    if files is not None:
+        return self.filter_ext(files, {".sh"})
-        # The files list should already be filtered by '--file-filter ".sh"' when
+
-        # calling this script
+    async def quick(self, files: List[str]) -> CommandResult:
-        passed, stdout, stderr = await shell_cmd(
+        return await shell_cmd(
-            ["tools/run_shellcheck.sh"] + [
+            ["tools/run_shellcheck.sh"] + [os.path.join(REPO_ROOT, f) for f in files],
                os.path.join(REPO_ROOT, f) for f in files
            ],
        )
        print_results("shellcheck: Run ShellCheck", passed, [
            stdout + "\n",
            stderr + "\n",
        ])
        return passed
-    # Not running quicklint, so use lint.yml
+    async def full(self) -> None:
-    passed, _, _ = await shell_cmd(
+        await shell_cmd(
            [
                sys.executable,
                "tools/actions_local_runner.py",
@ -253,40 +287,29 @@ async def run_shellcheck(files: Optional[List[str]], quiet: bool) -> bool:
            ],
            redirect=False,
        )
    return passed
-async def run_step(
+class YamlStep(Check):
-    step: Dict[str, Any], job_name: str, files: Optional[List[str]], quiet: bool
+    def __init__(self, step: Dict[str, Any], job_name: str, quiet: bool):
-) -> bool:
+        super().__init__(files=None, quiet=quiet)
        self.step = step
        self.name = f'{job_name}: {self.step["name"]}'
    async def full(self) -> CommandResult:
        env = os.environ.copy()
        env["GITHUB_WORKSPACE"] = "/tmp"
-    script = step["run"]
+        script = self.step["run"]
-    if quiet:
+        if self.quiet:
            # TODO: Either lint that GHA scripts only use 'set -eux' or make this more
            # resilient
            script = script.replace("set -eux", "set -eu")
            script = re.sub(r"^time ", "", script, flags=re.MULTILINE)
    name = f'{job_name}: {step["name"]}'
-    passed, stderr, stdout = await shell_cmd(script, env=env)
+        return await shell_cmd(script, env=env)
    if not passed:
        print_results(name, passed, [stdout, stderr])
    else:
        print_results(name, passed, [])
    return passed
-async def run_steps(
+def changed_files() -> Optional[List[str]]:
    steps: List[Dict[str, Any]], job_name: str, files: Optional[List[str]], quiet: bool
 ) -> bool:
    coros = [run_step(step, job_name, files, quiet) for step in steps]
    return all(await asyncio.gather(*coros))
 def relevant_changed_files(file_filters: Optional[List[str]]) -> Optional[List[str]]:
    changed_files: Optional[List[str]] = None
    try:
        changed_files = sorted(find_changed_files())
@ -298,16 +321,7 @@ def relevant_changed_files(file_filters: Optional[List[str]]) -> Optional[List[s
        )
        return None
    if file_filters is None:
    return changed_files
    else:
        relevant_files = []
        for f in changed_files:
            for file_filter in file_filters:
                if f.endswith(file_filter):
                    relevant_files.append(f)
                    break
        return relevant_files
 def grab_specific_steps(
@ -331,11 +345,6 @@ def main() -> None:
        description="Pull shell scripts out of GitHub actions and run them"
    )
    parser.add_argument("--file", help="YAML file with actions")
    parser.add_argument(
        "--file-filter",
        help="only pass through files with this extension",
        nargs="*",
    )
    parser.add_argument(
        "--changed-only",
        help="only run on changed files",
@ -349,12 +358,8 @@ def main() -> None:
    parser.add_argument("--step", action="append", help="steps to run (in order)")
    args = parser.parse_args()
    relevant_files = None
    quiet = not args.no_quiet
    if args.changed_only:
        relevant_files = relevant_changed_files(args.file_filter)
    if args.file is None:
        # If there is no .yml file provided, fall back to the list of known
        # jobs. We use this for flake8 and mypy since they run different
@ -363,7 +368,12 @@ def main() -> None:
            raise RuntimeError(
                f"Job {args.job} not found and no .yml file was provided"
            )
-        future = ad_hoc_steps[args.job](relevant_files, quiet)
+
        files = None
        if args.changed_only:
            files = changed_files()
        checks = [ad_hoc_steps[args.job](files, quiet)]
    else:
        if args.step is None:
            raise RuntimeError("1+ --steps must be provided")
@ -380,18 +390,21 @@ def main() -> None:
        # Pull the relevant sections out of the provided .yml file and run them
        relevant_steps = grab_specific_steps(args.step, job)
-        future = run_steps(relevant_steps, args.job, relevant_files, quiet)
+        checks = [
            YamlStep(step=step, job_name=args.job, quiet=quiet)
            for step in relevant_steps
        ]
    loop = asyncio.get_event_loop()
-    loop.run_until_complete(future)
+    loop.run_until_complete(asyncio.gather(*[check.run() for check in checks]))
 # These are run differently locally in order to enable quicklint, so dispatch
 # out to special handlers instead of using lint.yml
 ad_hoc_steps = {
-    "mypy": run_mypy,
+    "mypy": Mypy,
-    "flake8-py3": run_flake8,
+    "flake8-py3": Flake8,
-    "shellcheck": run_shellcheck,
+    "shellcheck": ShellCheck,
 }
 if __name__ == "__main__":
--- a/tools/test/test_actions_local_runner.py
+++ b/tools/test/test_actions_local_runner.py
@ -24,38 +24,23 @@ if sys.version_info >= (3, 8):
        def test_step_extraction(self) -> None:
            fake_job = {
                "steps": [
-                    {
+                    {"name": "test1", "run": "echo hi"},
-                        "name": "test1",
+                    {"name": "test2", "run": "echo hi"},
-                        "run": "echo hi"
+                    {"name": "test3", "run": "echo hi"},
                    },
                    {
                        "name": "test2",
                        "run": "echo hi"
                    },
                    {
                        "name": "test3",
                        "run": "echo hi"
                    },
                ]
            }
            actual = actions_local_runner.grab_specific_steps(["test2"], fake_job)
            expected = [
-                {
+                {"name": "test2", "run": "echo hi"},
                    "name": "test2",
                    "run": "echo hi"
                },
            ]
            self.assertEqual(actual, expected)
        async def test_runner(self) -> None:
-            fake_step = {
+            fake_step = {"name": "say hello", "run": "echo hi"}
                "name": "say hello",
                "run": "echo hi"
            }
            f = io.StringIO()
            with contextlib.redirect_stdout(f):
-                await actions_local_runner.run_steps([fake_step], "test", None, True)
+                await actions_local_runner.YamlStep(fake_step, "test", True).run()
            result = f.getvalue()
            self.assertIn("say hello", result)
@ -80,7 +65,9 @@ if sys.version_info >= (3, 8):
        def test_lint(self):
            cmd = ["make", "lint", "-j", str(multiprocessing.cpu_count())]
-            proc = subprocess.run(cmd, cwd=actions_local_runner.REPO_ROOT, stdout=subprocess.PIPE)
+            proc = subprocess.run(
                cmd, cwd=actions_local_runner.REPO_ROOT, stdout=subprocess.PIPE
            )
            stdout = proc.stdout.decode()
            for line in self.expected:
@ -90,7 +77,9 @@ if sys.version_info >= (3, 8):
        def test_quicklint(self):
            cmd = ["make", "quicklint", "-j", str(multiprocessing.cpu_count())]
-            proc = subprocess.run(cmd, cwd=actions_local_runner.REPO_ROOT, stdout=subprocess.PIPE)
+            proc = subprocess.run(
                cmd, cwd=actions_local_runner.REPO_ROOT, stdout=subprocess.PIPE
            )
            stdout = proc.stdout.decode()
            for line in self.expected:
@ -99,7 +88,6 @@ if sys.version_info >= (3, 8):
            # TODO: See https://github.com/pytorch/pytorch/issues/57967
            self.assertIn("mypy (skipped typestub generation)", stdout)
    class TestQuicklint(unittest.IsolatedAsyncioTestCase):
        test_files = [
            os.path.join("caffe2", "some_cool_file.py"),
@ -108,17 +96,21 @@ if sys.version_info >= (3, 8):
            os.path.join("torch", "some_stubs.pyi"),
            os.path.join("test.sh"),
        ]
-        test_py_files = [f for f in test_files if f.endswith(".py") or f.endswith(".pyi")]
+        test_py_files = [
            f for f in test_files if f.endswith(".py") or f.endswith(".pyi")
        ]
        test_sh_files = [f for f in test_files if f.endswith(".sh")]
        maxDiff = None
        def setUp(self, *args, **kwargs):
            for name in self.test_files:
-                bad_code = textwrap.dedent("""
+                bad_code = textwrap.dedent(
                    """
                    some_variable = '2'
                    some_variable = None
                    some_variable = 11.2
-                """).rstrip("\n")
+                """
                ).rstrip("\n")
                with open(name, "w") as f:
                    f.write(bad_code)
@ -135,20 +127,22 @@ if sys.version_info >= (3, 8):
        async def test_flake8(self):
            f = io.StringIO()
            with contextlib.redirect_stdout(f):
-                await actions_local_runner.run_flake8(self.test_py_files, True)
+                await actions_local_runner.Flake8(self.test_py_files, True).run()
            # Should exclude the caffe2/ file
-            expected = textwrap.dedent("""
+            expected = textwrap.dedent(
                """
                x flake8
                torch/some_cool_file.py:4:21: W292 no newline at end of file
                aten/some_cool_file.py:4:21: W292 no newline at end of file
-            """).lstrip("\n")
+            """
            ).lstrip("\n")
            self.assertEqual(expected, f.getvalue())
        async def test_shellcheck(self):
            f = io.StringIO()
            with contextlib.redirect_stdout(f):
-                await actions_local_runner.run_shellcheck(self.test_sh_files, True)
+                await actions_local_runner.ShellCheck(self.test_sh_files, True).run()
            self.assertIn("SC2148: Tips depend on target shell", f.getvalue())
            self.assertIn("SC2283: Remove spaces around = to assign", f.getvalue())
@ -173,12 +167,12 @@ if sys.version_info >= (3, 8):
                    redirect=True,
                )
-                await actions_local_runner.run_mypy(self.test_py_files, True)
+                await actions_local_runner.Mypy(self.test_py_files, True).run()
            # Should exclude the aten/ file; also, apparently mypy
            # typechecks files in reverse order
-            expected = textwrap.dedent("""
+            expected = textwrap.dedent(
                """
                x mypy (skipped typestub generation)
                torch/some_stubs.pyi:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
                torch/some_stubs.pyi:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
@ -186,9 +180,12 @@ if sys.version_info >= (3, 8):
                torch/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
                caffe2/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
                caffe2/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
-            """).lstrip("\n")  # noqa: B950
+            """  # noqa: B950
            ).lstrip(
                "\n"
            )
            self.assertEqual(expected, f.getvalue())
-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()