Add quicklint make target (#56559)

Summary: This queries the local git repo for changed files (any changed files, not just committed ones) and sends them to mypy/flake8 instead of the default (which is the whole repo, defined the .flake8 and mypy.ini files). This brings a good speedup (from 15 seconds with no cache to < 1 second from my local testing on this PR). ```bash make quicklint -j 6 ``` It should be noted that the results of this aren’t exactly what’s in the CI, since mypy and flake8 ignore the `include` and `exclude` parts of their config when an explicit list of files is passed in. ](https://our.intern.facebook.com/intern/diff/27901577/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/56559 Pulled By: driazati Reviewed By: malfet Differential Revision: D27901577 fbshipit-source-id: 99f351cdfe5aba007948aea2b8a78f683c5d8583
2025-12-06 12:20:52 +01:00 · 2021-04-21 13:43:52 -07:00 · 2021-04-21 13:43:52 -07:00 · 5e4dfd0140
commit 5e4dfd0140
parent 12b2bc94d7
6 changed files with 181 additions and 24 deletions
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@ -6,6 +6,12 @@ on:
    - master
  pull_request:

+# note [local lint]
+# LOCAL_FILES in the jobs below is intended to be filled by local (i.e. on a
+# developer's machine) job in tools/actions_local_runner.py to pass only the
+# changed files to whatever tool is consuming the files. This helps speed up
+# local lints while keeping the linting code unified between local / CI.
+
 jobs:
  quick-checks:
    runs-on: ubuntu-18.04
@ -128,15 +134,15 @@ jobs:
        uses: actions/checkout@v2
      - name: Install markdown-toc
        run: npm install -g markdown-toc
-      - name: Regenerate ToCs
+      - name: Regenerate ToCs and check that they didn't change
        run: |
          set -eux
          export PATH=~/.npm-global/bin:"$PATH"
          for FILE in $(git grep -Il '<!-- toc -->' -- '**.md'); do
            markdown-toc --bullets='-' -i "$FILE"
          done
-      - name: Assert that regenerating the ToCs didn't change them
-        run: .github/scripts/report_git_status.sh
+
+          .github/scripts/report_git_status.sh

  flake8-py3:
    runs-on: ubuntu-18.04
@ -164,9 +170,13 @@ jobs:
          pip install -r requirements-flake8.txt
          flake8 --version
      - name: Run flake8
+        env:
+          # See note [local lint]
+          LOCAL_FILES: ""
        run: |
          set -eux
-          flake8 | tee "${GITHUB_WORKSPACE}"/flake8-output.txt
+          # shellcheck disable=SC2086
+          flake8 $LOCAL_FILES | tee "${GITHUB_WORKSPACE}"/flake8-output.txt
      - name: Translate annotations
        if: github.event_name == 'pull_request'
        env:
@ -342,6 +352,10 @@ jobs:
          time python -mtools.codegen.gen -s aten/src/ATen -d build/aten/src/ATen
          time python -mtools.pyi.gen_pyi --native-functions-path aten/src/ATen/native/native_functions.yaml --deprecated-functions-path "tools/autograd/deprecated.yaml"
      - name: Run mypy
+        env:
+          # See note [local lint]
+          LOCAL_FILES: ""
        run: |
          set -eux
-          for CONFIG in mypy*.ini; do mypy --config="$CONFIG"; done
+          # shellcheck disable=SC2086
+          for CONFIG in mypy*.ini; do mypy --config="$CONFIG" $LOCAL_FILES; done
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -364,7 +364,11 @@ command runs tests such as `TestNN.test_BCELoss` and
 You can run the same linting steps that are used in CI locally via `make`:

 ```bash
+# Lint all files
 make lint -j 6  # run lint (using 6 parallel jobs)
+
+# Lint only the files you have changed
+make quicklint -j 6
 ```

 These jobs may require extra dependencies that aren't dependencies of PyTorch
--- a/13
+++ b/13
@ -55,12 +55,16 @@ quick_checks:
 flake8:
 	@python tools/actions_local_runner.py \
 		--file .github/workflows/lint.yml \
+		--file-filter '.py' \
+		$(CHANGED_ONLY) \
 		--job 'flake8-py3' \
 		--step 'Run flake8'

 mypy:
 	@python tools/actions_local_runner.py \
 		--file .github/workflows/lint.yml \
+		--file-filter '.py' \
+		$(CHANGED_ONLY) \
 		--job 'mypy' \
 		--step 'Run mypy'

@ -74,4 +78,13 @@ clang_tidy:
 	echo "clang-tidy local lint is not yet implemented"
 	exit 1

+toc:
+	@python tools/actions_local_runner.py \
+		--file .github/workflows/lint.yml \
+		--job 'toc' \
+		--step "Regenerate ToCs and check that they didn't change"
+
 lint: flake8 mypy quick_checks cmakelint generate-gha-workflows
+
+quicklint: CHANGED_ONLY=--changed-only
+quicklint: mypy flake8 mypy quick_checks cmakelint generate-gha-workflows
--- a/mypy-strict.ini
+++ b/mypy-strict.ini
@ -51,9 +51,11 @@ files =
    tools/test/test_mypy_wrapper.py,
    tools/test/test_test_history.py,
    tools/test/test_trailing_newlines.py,
+    tools/test/test_actions_local_runner.py,
    tools/test/test_translate_annotations.py,
    tools/trailing_newlines.py,
    tools/translate_annotations.py,
+    tools/actions_local_runner.py,
    torch/testing/_internal/framework_utils.py,
    torch/utils/benchmark/utils/common.py,
    torch/utils/benchmark/utils/timer.py,
--- a/tools/actions_local_runner.py
+++ b/tools/actions_local_runner.py
@ -1,10 +1,12 @@
-#!/bin/python3
+#!/usr/bin/env python3

 import subprocess
+import sys
 import os
 import argparse
 import yaml
 import asyncio
+from typing import List, Dict, Any, Optional


 REPO_ROOT = os.path.dirname(os.path.dirname(__file__))
@ -21,23 +23,71 @@ class col:
    UNDERLINE = "\033[4m"


-def color(the_color, text):
+def color(the_color: str, text: str) -> str:
    return col.BOLD + the_color + str(text) + col.RESET


-def cprint(the_color, text):
-    print(color(the_color, text))
+def cprint(the_color: str, text: str) -> None:
+    if hasattr(sys.stdout, "isatty") and sys.stdout.isatty():
+        print(color(the_color, text))
+    else:
+        print(text)


-async def run_step(step, job_name):
+def git(args: List[str]) -> List[str]:
+    p = subprocess.run(
+        ["git"] + args,
+        cwd=REPO_ROOT,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        check=True,
+    )
+    lines = p.stdout.decode().strip().split("\n")
+    return [line.strip() for line in lines]
+
+
+def find_changed_files(merge_base: str = "origin/master") -> List[str]:
+    untracked = []
+
+    for line in git(["status", "--porcelain"]):
+        # Untracked files start with ??, so grab all of those
+        if line.startswith("?? "):
+            untracked.append(line.replace("?? ", ""))
+
+    # Modified, unstaged
+    modified = git(["diff", "--name-only"])
+
+    # Modified, staged
+    cached = git(["diff", "--cached", "--name-only"])
+
+    # Committed
+    diff_with_origin = git(["diff", "--name-only", "--merge-base", merge_base, "HEAD"])
+
+    # De-duplicate
+    all_files = set(untracked + cached + modified + diff_with_origin)
+    return [x.strip() for x in all_files if x.strip() != ""]
+
+
+async def run_step(step: Dict[str, Any], job_name: str, files: Optional[List[str]]) -> bool:
    env = os.environ.copy()
    env["GITHUB_WORKSPACE"] = "/tmp"
+    if files is None:
+        env["LOCAL_FILES"] = ""
+    else:
+        env["LOCAL_FILES"] = " ".join(files)
    script = step["run"]

+    PASS = "\U00002705"
+    FAIL = "\U0000274C"
    # We don't need to print the commands for local running
    # TODO: Either lint that GHA scripts only use 'set -eux' or make this more
    # resilient
    script = script.replace("set -eux", "set -eu")
+    name = f'{job_name}: {step["name"]}'
+
+    def header(passed: bool) -> None:
+        icon = PASS if passed else FAIL
+        cprint(col.BLUE, f"{icon} {name}")

    try:
        proc = await asyncio.create_subprocess_shell(
@ -49,27 +99,31 @@ async def run_step(step, job_name):
            stderr=subprocess.PIPE,
        )

-        stdout, stderr = await proc.communicate()
-        cprint(col.BLUE, f'{job_name}: {step["name"]}')
-    except Exception as e:
-        cprint(col.BLUE, f'{job_name}: {step["name"]}')
-        print(e)
+        stdout_bytes, stderr_bytes = await proc.communicate()

-    stdout = stdout.decode().strip()
-    stderr = stderr.decode().strip()
+        header(passed=proc.returncode == 0)
+    except Exception as e:
+        header(passed=False)
+        print(e)
+        return False
+
+    stdout = stdout_bytes.decode().strip()
+    stderr = stderr_bytes.decode().strip()

    if stderr != "":
        print(stderr)
    if stdout != "":
        print(stdout)

+    return proc.returncode == 0

-async def run_steps(steps, job_name):
-    coros = [run_step(step, job_name) for step in steps]
+
+async def run_steps(steps: List[Dict[str, Any]], job_name: str, files: Optional[List[str]]) -> None:
+    coros = [run_step(step, job_name, files) for step in steps]
    await asyncio.gather(*coros)


-def grab_specific_steps(steps_to_grab, job):
+def grab_specific_steps(steps_to_grab: List[str], job: Dict[str, Any]) -> List[Dict[str, Any]]:
    relevant_steps = []
    for step in steps_to_grab:
        for actual_step in job["steps"]:
@ -83,7 +137,7 @@ def grab_specific_steps(steps_to_grab, job):
    return relevant_steps


-def grab_all_steps_after(last_step, job):
+def grab_all_steps_after(last_step: str, job: Dict[str, Any]) -> List[Dict[str, Any]]:
    relevant_steps = []

    found = False
@ -96,11 +150,13 @@ def grab_all_steps_after(last_step, job):
    return relevant_steps


-def main():
+def main() -> None:
    parser = argparse.ArgumentParser(
        description="Pull shell scripts out of GitHub actions and run them"
    )
    parser.add_argument("--file", help="YAML file with actions", required=True)
+    parser.add_argument("--file-filter", help="only pass through files with this extension", default='')
+    parser.add_argument("--changed-only", help="only run on changed files", action='store_true', default=False)
    parser.add_argument("--job", help="job name", required=True)
    parser.add_argument("--step", action="append", help="steps to run (in order)")
    parser.add_argument(
@ -108,6 +164,15 @@ def main():
    )
    args = parser.parse_args()

+    changed_files = None
+    if args.changed_only:
+        changed_files = []
+        for f in find_changed_files():
+            for file_filter in args.file_filter:
+                if f.endswith(file_filter):
+                    changed_files.append(f)
+                    break
+
    if args.step is None and args.all_steps_after is None:
        raise RuntimeError("1+ --steps or --all-steps-after must be provided")

@ -129,8 +194,7 @@ def main():
    else:
        relevant_steps = grab_all_steps_after(args.all_steps_after, job)

-    # pprint.pprint(relevant_steps)
-    asyncio.run(run_steps(relevant_steps, args.job))
+    asyncio.run(run_steps(relevant_steps, args.job, changed_files))  # type: ignore[attr-defined]


 if __name__ == "__main__":
--- a/tools/test/test_actions_local_runner.py
+++ b/tools/test/test_actions_local_runner.py
@ -0,0 +1,60 @@
+import unittest
+import sys
+import contextlib
+import io
+from typing import List, Dict, Any
+
+from tools import actions_local_runner
+
+
+if __name__ == '__main__':
+    if sys.version_info >= (3, 8):
+        # actions_local_runner uses asyncio features not available in 3.6, and
+        # IsolatedAsyncioTestCase was added in 3.8, so skip testing on
+        # unsupported systems
+        class TestRunner(unittest.IsolatedAsyncioTestCase):
+            def run(self, *args: List[Any], **kwargs: List[Dict[str, Any]]) -> Any:
+                return super().run(*args, **kwargs)
+
+            def test_step_extraction(self) -> None:
+                fake_job = {
+                    "steps": [
+                        {
+                            "name": "test1",
+                            "run": "echo hi"
+                        },
+                        {
+                            "name": "test2",
+                            "run": "echo hi"
+                        },
+                        {
+                            "name": "test3",
+                            "run": "echo hi"
+                        },
+                    ]
+                }
+
+                actual = actions_local_runner.grab_specific_steps(["test2"], fake_job)
+                expected = [
+                    {
+                        "name": "test2",
+                        "run": "echo hi"
+                    },
+                ]
+                self.assertEqual(actual, expected)
+
+            async def test_runner(self) -> None:
+                fake_step = {
+                    "name": "say hello",
+                    "run": "echo hi"
+                }
+                f = io.StringIO()
+                with contextlib.redirect_stdout(f):
+                    await actions_local_runner.run_steps([fake_step], "test", None)
+
+                result = f.getvalue()
+                self.assertIn("say hello", result)
+                self.assertIn("hi", result)
+
+
+    unittest.main()