Refactor local lint (#58798)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/58798

In #58623 there was a bug in `make quicklint` where ShellCheck would run on the entire repo when there were no files. This PR fixes that by refactoring out common stuff (like skipping quicklint when there are no files, let checks do their own file filtering) and pushes the logic into a runner class.

Test Plan: Imported from OSS

Reviewed By: samestep

Differential Revision: D28649889

Pulled By: driazati

fbshipit-source-id: b19f32cdb63396c806cb689b2f6daf97e1724d44
This commit is contained in:
driazati 2021-05-24 13:49:47 -07:00 committed by Facebook GitHub Bot
parent a7f4f80903
commit a679bb5ecf
3 changed files with 218 additions and 211 deletions

View File

@ -46,7 +46,6 @@ shellcheck:
--job 'shellcheck' \ --job 'shellcheck' \
--step 'Extract scripts from GitHub Actions workflows' --step 'Extract scripts from GitHub Actions workflows'
@$(PYTHON) tools/actions_local_runner.py \ @$(PYTHON) tools/actions_local_runner.py \
--file-filter '.sh' \
$(CHANGED_ONLY) \ $(CHANGED_ONLY) \
--job 'shellcheck' --job 'shellcheck'
@ -90,13 +89,11 @@ quick_checks:
flake8: flake8:
@$(PYTHON) tools/actions_local_runner.py \ @$(PYTHON) tools/actions_local_runner.py \
--file-filter '.py' \
$(CHANGED_ONLY) \ $(CHANGED_ONLY) \
--job 'flake8-py3' --job 'flake8-py3'
mypy: mypy:
@$(PYTHON) tools/actions_local_runner.py \ @$(PYTHON) tools/actions_local_runner.py \
--file-filter '.py' \
$(CHANGED_ONLY) \ $(CHANGED_ONLY) \
--job 'mypy' --job 'mypy'

View File

@ -13,7 +13,7 @@ import fnmatch
import shlex import shlex
import configparser import configparser
from typing import List, Dict, Any, Optional, Tuple, Union from typing import List, Dict, Any, Optional, Union, NamedTuple, Set
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@ -87,20 +87,28 @@ def find_changed_files() -> List[str]:
def print_results(job_name: str, passed: bool, streams: List[str]) -> None: def print_results(job_name: str, passed: bool, streams: List[str]) -> None:
header(job_name, passed) icon = color(col.GREEN, "") if passed else color(col.RED, "x")
print(f"{icon} {color(col.BLUE, job_name)}")
for stream in streams: for stream in streams:
stream = stream.strip() stream = stream.strip()
if stream != "": if stream != "":
print(stream) print(stream)
class CommandResult(NamedTuple):
passed: bool
stdout: str
stderr: str
async def shell_cmd( async def shell_cmd(
cmd: Union[str, List[str]], cmd: Union[str, List[str]],
env: Optional[Dict[str, Any]] = None, env: Optional[Dict[str, Any]] = None,
redirect: bool = True, redirect: bool = True,
) -> Tuple[bool, str, str]: ) -> CommandResult:
if isinstance(cmd, list): if isinstance(cmd, list):
cmd_str = ' '.join(shlex.quote(arg) for arg in cmd) cmd_str = " ".join(shlex.quote(arg) for arg in cmd)
else: else:
cmd_str = cmd cmd_str = cmd
@ -117,176 +125,191 @@ async def shell_cmd(
passed = proc.returncode == 0 passed = proc.returncode == 0
if not redirect: if not redirect:
return passed, "", "" return CommandResult(passed, "", "")
return passed, stdout.decode().strip(), stderr.decode().strip() return CommandResult(passed, stdout.decode().strip(), stderr.decode().strip())
def header(name: str, passed: bool) -> None: class Check:
PASS = color(col.GREEN, "") name: str
FAIL = color(col.RED, "x")
icon = PASS if passed else FAIL
print(f"{icon} {color(col.BLUE, name)}")
def __init__(self, files: Optional[List[str]], quiet: bool):
self.quiet = quiet
self.files = files
def get_flake_excludes() -> List[str]: async def run(self) -> bool:
config = configparser.ConfigParser() result = await self.run_helper()
config.read(os.path.join(REPO_ROOT, ".flake8")) if result is None:
excludes = re.split(r',\s*', config["flake8"]["exclude"].strip())
excludes = [e.strip() for e in excludes if e.strip() != ""]
return excludes
async def run_flake8(files: Optional[List[str]], quiet: bool) -> bool:
cmd = ["flake8"]
excludes = get_flake_excludes()
def should_include(name: str) -> bool:
for exclude in excludes:
if fnmatch.fnmatch(name, pat=exclude):
return False
if name.startswith(exclude) or ("./" + name).startswith(exclude):
return False
return True
if files is not None:
files = [f for f in files if should_include(f)]
if len(files) == 0:
print_results("flake8", True, [])
return True return True
# Running quicklint, pass in an explicit list of files (unlike mypy, streams = []
# flake8 will still use .flake8 to filter this list by the 'exclude's if not result.passed:
# in the config streams = [
cmd += files result.stderr,
result.stdout,
]
print_results(self.name, result.passed, streams)
return result.passed
passed, stdout, stderr = await shell_cmd(cmd) async def run_helper(self) -> Optional[CommandResult]:
print_results("flake8", passed, [stdout, stderr]) if self.files is not None:
return passed relevant_files = self.filter_files(self.files)
if len(relevant_files) == 0:
# No files, do nothing
return CommandResult(passed=True, stdout="", stderr="")
return await self.quick(relevant_files)
return await self.full()
def filter_ext(self, files: List[str], extensions: Set[str]) -> List[str]:
def passes(filename: str) -> bool:
return os.path.splitext(filename)[1] in extensions
return [f for f in files if passes(f)]
def filter_files(self, files: List[str]) -> List[str]:
return files
async def quick(self, files: List[str]) -> CommandResult:
raise NotImplementedError
async def full(self) -> Optional[CommandResult]:
raise NotImplementedError
async def run_mypy(files: Optional[List[str]], quiet: bool) -> bool: class Flake8(Check):
env = os.environ.copy() name = "flake8"
if should_color():
# Secret env variable: https://github.com/python/mypy/issues/7771
env["MYPY_FORCE_COLOR"] = "1"
if files is not None: def filter_files(self, files: List[str]) -> List[str]:
# Running quick lint, use mypy-wrapper instead so it checks that the files config = configparser.ConfigParser()
# actually should be linted config.read(os.path.join(REPO_ROOT, ".flake8"))
passed, stdout, stderr = await shell_cmd( excludes = re.split(r",\s*", config["flake8"]["exclude"].strip())
[sys.executable, "tools/mypy_wrapper.py"] + [ excludes = [e.strip() for e in excludes if e.strip() != ""]
os.path.join(REPO_ROOT, f) for f in files
def should_include(name: str) -> bool:
for exclude in excludes:
if fnmatch.fnmatch(name, pat=exclude):
return False
if name.startswith(exclude) or f"./{name}".startswith(exclude):
return False
return True
files = self.filter_ext(files, {".py"})
return [f for f in files if should_include(f)]
async def quick(self, files: List[str]) -> CommandResult:
return await shell_cmd(["flake8"] + files)
async def full(self) -> CommandResult:
return await shell_cmd(["flake8"])
class Mypy(Check):
name = "mypy (skipped typestub generation)"
def filter_files(self, files: List[str]) -> List[str]:
return self.filter_ext(files, {".py", ".pyi"})
def env(self) -> Dict[str, Any]:
env = os.environ.copy()
if should_color():
# Secret env variable: https://github.com/python/mypy/issues/7771
env["MYPY_FORCE_COLOR"] = "1"
return env
async def quick(self, files: List[str]) -> CommandResult:
return await shell_cmd(
[sys.executable, "tools/mypy_wrapper.py"]
+ [os.path.join(REPO_ROOT, f) for f in files],
env=self.env(),
)
async def full(self) -> None:
env = self.env()
# hackily change the name
self.name = "mypy"
await shell_cmd(
[
sys.executable,
"tools/actions_local_runner.py",
"--job",
"mypy",
"--file",
".github/workflows/lint.yml",
"--step",
"Run autogen",
], ],
redirect=False,
env=env, env=env,
) )
print_results("mypy (skipped typestub generation)", passed, [ await shell_cmd(
stdout + "\n", [
stderr + "\n", sys.executable,
]) "tools/actions_local_runner.py",
return passed "--job",
"mypy",
# Not running quicklint, so use lint.yml "--file",
_, _, _ = await shell_cmd( ".github/workflows/lint.yml",
[ "--step",
sys.executable, "Run mypy",
"tools/actions_local_runner.py",
"--job",
"mypy",
"--file",
".github/workflows/lint.yml",
"--step",
"Run autogen",
],
redirect=False,
env=env,
)
passed, _, _ = await shell_cmd(
[
sys.executable,
"tools/actions_local_runner.py",
"--job",
"mypy",
"--file",
".github/workflows/lint.yml",
"--step",
"Run mypy",
],
redirect=False,
env=env,
)
return passed
async def run_shellcheck(files: Optional[List[str]], quiet: bool) -> bool:
if files is not None:
# The files list should already be filtered by '--file-filter ".sh"' when
# calling this script
passed, stdout, stderr = await shell_cmd(
["tools/run_shellcheck.sh"] + [
os.path.join(REPO_ROOT, f) for f in files
], ],
redirect=False,
env=env,
) )
print_results("shellcheck: Run ShellCheck", passed, [
stdout + "\n",
stderr + "\n",
])
return passed
# Not running quicklint, so use lint.yml
passed, _, _ = await shell_cmd(
[
sys.executable,
"tools/actions_local_runner.py",
"--job",
"shellcheck",
"--file",
".github/workflows/lint.yml",
"--step",
"Run ShellCheck",
],
redirect=False,
)
return passed
async def run_step( class ShellCheck(Check):
step: Dict[str, Any], job_name: str, files: Optional[List[str]], quiet: bool name = "shellcheck: Run ShellCheck"
) -> bool:
env = os.environ.copy()
env["GITHUB_WORKSPACE"] = "/tmp"
script = step["run"]
if quiet: def filter_files(self, files: List[str]) -> List[str]:
# TODO: Either lint that GHA scripts only use 'set -eux' or make this more return self.filter_ext(files, {".sh"})
# resilient
script = script.replace("set -eux", "set -eu")
script = re.sub(r"^time ", "", script, flags=re.MULTILINE)
name = f'{job_name}: {step["name"]}'
passed, stderr, stdout = await shell_cmd(script, env=env) async def quick(self, files: List[str]) -> CommandResult:
if not passed: return await shell_cmd(
print_results(name, passed, [stdout, stderr]) ["tools/run_shellcheck.sh"] + [os.path.join(REPO_ROOT, f) for f in files],
else: )
print_results(name, passed, [])
return passed async def full(self) -> None:
await shell_cmd(
[
sys.executable,
"tools/actions_local_runner.py",
"--job",
"shellcheck",
"--file",
".github/workflows/lint.yml",
"--step",
"Run ShellCheck",
],
redirect=False,
)
async def run_steps( class YamlStep(Check):
steps: List[Dict[str, Any]], job_name: str, files: Optional[List[str]], quiet: bool def __init__(self, step: Dict[str, Any], job_name: str, quiet: bool):
) -> bool: super().__init__(files=None, quiet=quiet)
coros = [run_step(step, job_name, files, quiet) for step in steps] self.step = step
return all(await asyncio.gather(*coros)) self.name = f'{job_name}: {self.step["name"]}'
async def full(self) -> CommandResult:
env = os.environ.copy()
env["GITHUB_WORKSPACE"] = "/tmp"
script = self.step["run"]
if self.quiet:
# TODO: Either lint that GHA scripts only use 'set -eux' or make this more
# resilient
script = script.replace("set -eux", "set -eu")
script = re.sub(r"^time ", "", script, flags=re.MULTILINE)
return await shell_cmd(script, env=env)
def relevant_changed_files(file_filters: Optional[List[str]]) -> Optional[List[str]]: def changed_files() -> Optional[List[str]]:
changed_files: Optional[List[str]] = None changed_files: Optional[List[str]] = None
try: try:
changed_files = sorted(find_changed_files()) changed_files = sorted(find_changed_files())
@ -298,16 +321,7 @@ def relevant_changed_files(file_filters: Optional[List[str]]) -> Optional[List[s
) )
return None return None
if file_filters is None: return changed_files
return changed_files
else:
relevant_files = []
for f in changed_files:
for file_filter in file_filters:
if f.endswith(file_filter):
relevant_files.append(f)
break
return relevant_files
def grab_specific_steps( def grab_specific_steps(
@ -331,11 +345,6 @@ def main() -> None:
description="Pull shell scripts out of GitHub actions and run them" description="Pull shell scripts out of GitHub actions and run them"
) )
parser.add_argument("--file", help="YAML file with actions") parser.add_argument("--file", help="YAML file with actions")
parser.add_argument(
"--file-filter",
help="only pass through files with this extension",
nargs="*",
)
parser.add_argument( parser.add_argument(
"--changed-only", "--changed-only",
help="only run on changed files", help="only run on changed files",
@ -349,12 +358,8 @@ def main() -> None:
parser.add_argument("--step", action="append", help="steps to run (in order)") parser.add_argument("--step", action="append", help="steps to run (in order)")
args = parser.parse_args() args = parser.parse_args()
relevant_files = None
quiet = not args.no_quiet quiet = not args.no_quiet
if args.changed_only:
relevant_files = relevant_changed_files(args.file_filter)
if args.file is None: if args.file is None:
# If there is no .yml file provided, fall back to the list of known # If there is no .yml file provided, fall back to the list of known
# jobs. We use this for flake8 and mypy since they run different # jobs. We use this for flake8 and mypy since they run different
@ -363,7 +368,12 @@ def main() -> None:
raise RuntimeError( raise RuntimeError(
f"Job {args.job} not found and no .yml file was provided" f"Job {args.job} not found and no .yml file was provided"
) )
future = ad_hoc_steps[args.job](relevant_files, quiet)
files = None
if args.changed_only:
files = changed_files()
checks = [ad_hoc_steps[args.job](files, quiet)]
else: else:
if args.step is None: if args.step is None:
raise RuntimeError("1+ --steps must be provided") raise RuntimeError("1+ --steps must be provided")
@ -380,18 +390,21 @@ def main() -> None:
# Pull the relevant sections out of the provided .yml file and run them # Pull the relevant sections out of the provided .yml file and run them
relevant_steps = grab_specific_steps(args.step, job) relevant_steps = grab_specific_steps(args.step, job)
future = run_steps(relevant_steps, args.job, relevant_files, quiet) checks = [
YamlStep(step=step, job_name=args.job, quiet=quiet)
for step in relevant_steps
]
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
loop.run_until_complete(future) loop.run_until_complete(asyncio.gather(*[check.run() for check in checks]))
# These are run differently locally in order to enable quicklint, so dispatch # These are run differently locally in order to enable quicklint, so dispatch
# out to special handlers instead of using lint.yml # out to special handlers instead of using lint.yml
ad_hoc_steps = { ad_hoc_steps = {
"mypy": run_mypy, "mypy": Mypy,
"flake8-py3": run_flake8, "flake8-py3": Flake8,
"shellcheck": run_shellcheck, "shellcheck": ShellCheck,
} }
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -24,38 +24,23 @@ if sys.version_info >= (3, 8):
def test_step_extraction(self) -> None: def test_step_extraction(self) -> None:
fake_job = { fake_job = {
"steps": [ "steps": [
{ {"name": "test1", "run": "echo hi"},
"name": "test1", {"name": "test2", "run": "echo hi"},
"run": "echo hi" {"name": "test3", "run": "echo hi"},
},
{
"name": "test2",
"run": "echo hi"
},
{
"name": "test3",
"run": "echo hi"
},
] ]
} }
actual = actions_local_runner.grab_specific_steps(["test2"], fake_job) actual = actions_local_runner.grab_specific_steps(["test2"], fake_job)
expected = [ expected = [
{ {"name": "test2", "run": "echo hi"},
"name": "test2",
"run": "echo hi"
},
] ]
self.assertEqual(actual, expected) self.assertEqual(actual, expected)
async def test_runner(self) -> None: async def test_runner(self) -> None:
fake_step = { fake_step = {"name": "say hello", "run": "echo hi"}
"name": "say hello",
"run": "echo hi"
}
f = io.StringIO() f = io.StringIO()
with contextlib.redirect_stdout(f): with contextlib.redirect_stdout(f):
await actions_local_runner.run_steps([fake_step], "test", None, True) await actions_local_runner.YamlStep(fake_step, "test", True).run()
result = f.getvalue() result = f.getvalue()
self.assertIn("say hello", result) self.assertIn("say hello", result)
@ -80,7 +65,9 @@ if sys.version_info >= (3, 8):
def test_lint(self): def test_lint(self):
cmd = ["make", "lint", "-j", str(multiprocessing.cpu_count())] cmd = ["make", "lint", "-j", str(multiprocessing.cpu_count())]
proc = subprocess.run(cmd, cwd=actions_local_runner.REPO_ROOT, stdout=subprocess.PIPE) proc = subprocess.run(
cmd, cwd=actions_local_runner.REPO_ROOT, stdout=subprocess.PIPE
)
stdout = proc.stdout.decode() stdout = proc.stdout.decode()
for line in self.expected: for line in self.expected:
@ -90,7 +77,9 @@ if sys.version_info >= (3, 8):
def test_quicklint(self): def test_quicklint(self):
cmd = ["make", "quicklint", "-j", str(multiprocessing.cpu_count())] cmd = ["make", "quicklint", "-j", str(multiprocessing.cpu_count())]
proc = subprocess.run(cmd, cwd=actions_local_runner.REPO_ROOT, stdout=subprocess.PIPE) proc = subprocess.run(
cmd, cwd=actions_local_runner.REPO_ROOT, stdout=subprocess.PIPE
)
stdout = proc.stdout.decode() stdout = proc.stdout.decode()
for line in self.expected: for line in self.expected:
@ -99,7 +88,6 @@ if sys.version_info >= (3, 8):
# TODO: See https://github.com/pytorch/pytorch/issues/57967 # TODO: See https://github.com/pytorch/pytorch/issues/57967
self.assertIn("mypy (skipped typestub generation)", stdout) self.assertIn("mypy (skipped typestub generation)", stdout)
class TestQuicklint(unittest.IsolatedAsyncioTestCase): class TestQuicklint(unittest.IsolatedAsyncioTestCase):
test_files = [ test_files = [
os.path.join("caffe2", "some_cool_file.py"), os.path.join("caffe2", "some_cool_file.py"),
@ -108,17 +96,21 @@ if sys.version_info >= (3, 8):
os.path.join("torch", "some_stubs.pyi"), os.path.join("torch", "some_stubs.pyi"),
os.path.join("test.sh"), os.path.join("test.sh"),
] ]
test_py_files = [f for f in test_files if f.endswith(".py") or f.endswith(".pyi")] test_py_files = [
f for f in test_files if f.endswith(".py") or f.endswith(".pyi")
]
test_sh_files = [f for f in test_files if f.endswith(".sh")] test_sh_files = [f for f in test_files if f.endswith(".sh")]
maxDiff = None maxDiff = None
def setUp(self, *args, **kwargs): def setUp(self, *args, **kwargs):
for name in self.test_files: for name in self.test_files:
bad_code = textwrap.dedent(""" bad_code = textwrap.dedent(
"""
some_variable = '2' some_variable = '2'
some_variable = None some_variable = None
some_variable = 11.2 some_variable = 11.2
""").rstrip("\n") """
).rstrip("\n")
with open(name, "w") as f: with open(name, "w") as f:
f.write(bad_code) f.write(bad_code)
@ -135,20 +127,22 @@ if sys.version_info >= (3, 8):
async def test_flake8(self): async def test_flake8(self):
f = io.StringIO() f = io.StringIO()
with contextlib.redirect_stdout(f): with contextlib.redirect_stdout(f):
await actions_local_runner.run_flake8(self.test_py_files, True) await actions_local_runner.Flake8(self.test_py_files, True).run()
# Should exclude the caffe2/ file # Should exclude the caffe2/ file
expected = textwrap.dedent(""" expected = textwrap.dedent(
"""
x flake8 x flake8
torch/some_cool_file.py:4:21: W292 no newline at end of file torch/some_cool_file.py:4:21: W292 no newline at end of file
aten/some_cool_file.py:4:21: W292 no newline at end of file aten/some_cool_file.py:4:21: W292 no newline at end of file
""").lstrip("\n") """
).lstrip("\n")
self.assertEqual(expected, f.getvalue()) self.assertEqual(expected, f.getvalue())
async def test_shellcheck(self): async def test_shellcheck(self):
f = io.StringIO() f = io.StringIO()
with contextlib.redirect_stdout(f): with contextlib.redirect_stdout(f):
await actions_local_runner.run_shellcheck(self.test_sh_files, True) await actions_local_runner.ShellCheck(self.test_sh_files, True).run()
self.assertIn("SC2148: Tips depend on target shell", f.getvalue()) self.assertIn("SC2148: Tips depend on target shell", f.getvalue())
self.assertIn("SC2283: Remove spaces around = to assign", f.getvalue()) self.assertIn("SC2283: Remove spaces around = to assign", f.getvalue())
@ -173,12 +167,12 @@ if sys.version_info >= (3, 8):
redirect=True, redirect=True,
) )
await actions_local_runner.run_mypy(self.test_py_files, True) await actions_local_runner.Mypy(self.test_py_files, True).run()
# Should exclude the aten/ file; also, apparently mypy # Should exclude the aten/ file; also, apparently mypy
# typechecks files in reverse order # typechecks files in reverse order
expected = textwrap.dedent(""" expected = textwrap.dedent(
"""
x mypy (skipped typestub generation) x mypy (skipped typestub generation)
torch/some_stubs.pyi:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str") [assignment] torch/some_stubs.pyi:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str") [assignment]
torch/some_stubs.pyi:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str") [assignment] torch/some_stubs.pyi:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str") [assignment]
@ -186,9 +180,12 @@ if sys.version_info >= (3, 8):
torch/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str") [assignment] torch/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str") [assignment]
caffe2/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str") [assignment] caffe2/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str") [assignment]
caffe2/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str") [assignment] caffe2/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str") [assignment]
""").lstrip("\n") # noqa: B950 """ # noqa: B950
).lstrip(
"\n"
)
self.assertEqual(expected, f.getvalue()) self.assertEqual(expected, f.getvalue())
if __name__ == '__main__': if __name__ == "__main__":
unittest.main() unittest.main()