Make mypy_wrapper.py accept multiple filenames (#57998)

Summary:
A followup to https://github.com/pytorch/pytorch/issues/57752.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/57998

Test Plan:
```
mypy --config=mypy-strict.ini
python tools/test/test_mypy_wrapper.py
python tools/test/test_actions_local_runner.py -k mypy
```

Reviewed By: driazati

Differential Revision: D28338531

Pulled By: samestep

fbshipit-source-id: ae31e3fa4a2b8060c200f9a13f768beaf2f55694
This commit is contained in:
Sam Estep 2021-05-11 15:50:51 -07:00 committed by Facebook GitHub Bot
parent f9c8b7f1a8
commit c36055bb42
5 changed files with 301 additions and 145 deletions

View File

@ -8,6 +8,7 @@ cache_dir = .mypy_cache/normal
warn_unused_configs = True warn_unused_configs = True
warn_redundant_casts = True warn_redundant_casts = True
show_error_codes = True show_error_codes = True
show_column_numbers = True
check_untyped_defs = True check_untyped_defs = True
follow_imports = silent follow_imports = silent

View File

@ -173,28 +173,18 @@ async def run_mypy(files: Optional[List[str]], quiet: bool) -> bool:
if files is not None: if files is not None:
# Running quick lint, use mypy-wrapper instead so it checks that the files # Running quick lint, use mypy-wrapper instead so it checks that the files
# actually should be linted # actually should be linted
stdout = ""
stderr = ""
passed = True
# Pass each file to the mypy_wrapper script passed, stdout, stderr = await shell_cmd(
# TODO: Fix mypy wrapper to mock mypy's args and take in N files instead [sys.executable, "tools/mypy_wrapper.py"] + [
# of just 1 at a time os.path.join(REPO_ROOT, f) for f in files
for f in files: ],
f = os.path.join(REPO_ROOT, f)
f_passed, f_stdout, f_stderr = await shell_cmd(
[sys.executable, "tools/mypy_wrapper.py", f],
env=env, env=env,
) )
if not f_passed:
passed = False
if f_stdout != "": print_results("mypy (skipped typestub generation)", passed, [
stdout += f_stdout + "\n" stdout + "\n",
if f_stderr != "": stderr + "\n",
stderr += f_stderr + "\n" ])
print_results("mypy (skipped typestub generation)", passed, [stdout, stderr])
return passed return passed
# Not running quicklint, so use lint.yml # Not running quicklint, so use lint.yml

View File

@ -18,11 +18,11 @@ See also these wiki pages:
- https://github.com/pytorch/pytorch/wiki/Lint-as-you-type - https://github.com/pytorch/pytorch/wiki/Lint-as-you-type
""" """
import re
import sys import sys
from collections import defaultdict
from configparser import ConfigParser from configparser import ConfigParser
from pathlib import Path, PurePath from pathlib import Path, PurePath, PurePosixPath
from typing import List, Set from typing import Any, Dict, List, Optional, Set, Tuple
import mypy.api import mypy.api
# not part of the public API, but this is the easiest way to ensure that # not part of the public API, but this is the easiest way to ensure that
@ -30,35 +30,144 @@ import mypy.api
import mypy.config_parser import mypy.config_parser
def config_files() -> Set[str]: def read_config(config_path: Path) -> Set[str]:
""" """
Return a set of the names of all the PyTorch mypy config files. Return the set of `files` in the `mypy` ini file at config_path.
"""
return {str(p) for p in Path().glob('mypy*.ini')}
def is_match(*, pattern: str, filename: str) -> bool:
"""
Return True iff the filename matches the (mypy ini) glob pattern.
"""
for path in mypy.config_parser.split_and_match_files(pattern):
path = PurePath(path).as_posix()
if filename == path or filename.startswith(f'{path}/'):
return True
return False
def in_files(*, ini: str, py: str) -> bool:
"""
Return True iff the py file is included in the ini file's "files".
""" """
config = ConfigParser() config = ConfigParser()
config.read(config_path)
# hopefully on Windows this gives posix paths
return set(mypy.config_parser.split_and_match_files(
config['mypy']['files'],
))
# see tools/test/test_mypy_wrapper.py for examples of many of the
# following functions
def config_files() -> Dict[str, Set[str]]:
"""
Return a dict from all our `mypy` ini filenames to their `files`.
"""
return {str(ini): read_config(ini) for ini in Path().glob('mypy*.ini')}
def split_path(path: str) -> List[str]:
"""
Split a relative (not absolute) POSIX path into its segments.
"""
pure = PurePosixPath(path)
return [str(p.name) for p in list(reversed(pure.parents))[1:] + [pure]]
# mypy doesn't support recursive types yet
# https://github.com/python/mypy/issues/731
# but if it did, the `Any` here would be `Union[Set[str], 'Trie']`,
# although that is not completely accurate: specifically, every `None`
# key must map to a `Set[str]`, and every `str` key must map to a `Trie`
Trie = Dict[Optional[str], Any]
def make_trie(configs: Dict[str, Set[str]]) -> Trie:
"""
Return a trie from path prefixes to their `mypy` configs.
Specifically, each layer of the trie represents a segment of a POSIX
path relative to the root of this repo. If you follow a path down
the trie and reach a `None` key, that `None` maps to the (nonempty)
set of keys in `configs` which explicitly include that path.
"""
trie: Trie = {}
for ini, files in configs.items():
for f in files:
inner = trie
for segment in split_path(f):
inner = inner.setdefault(segment, {})
inner.setdefault(None, set()).add(ini)
return trie
def lookup(trie: Trie, filename: str) -> Set[str]:
"""
Return the configs in `trie` that include a prefix of `filename`.
A path is included by a config if any of its ancestors are included
by the wildcard-expanded version of that config's `files`. Thus,
this function follows `filename`'s path down the `trie` and
accumulates all the configs it finds along the way.
"""
configs = set()
inner = trie
for segment in split_path(filename):
inner = inner.get(segment, {})
configs |= inner.get(None, set())
return configs
def make_plan(
*,
configs: Dict[str, Set[str]],
files: List[str]
) -> Dict[str, List[str]]:
"""
Return a dict from config names to the files to run them with.
The keys of the returned dict are a subset of the keys of `configs`.
The list of files in each value of returned dict should contain a
nonempty subset of the given `files`, in the same order as `files`.
"""
trie = make_trie(configs)
plan = defaultdict(list)
for filename in files:
for config in lookup(trie, filename):
plan[config].append(filename)
return plan
def run(*, args: List[str], files: List[str]) -> Tuple[int, List[str]]:
"""
Return the exit code and list of output lines from running `mypy`.
The given `args` are passed verbatim to `mypy`. The `files` (each of
which must be an absolute path) are converted to relative paths
(that is, relative to the root of this repo) and then classified
according to which ones need to be run with each `mypy` config.
Thus, `mypy` may be run zero, one, or multiple times, but it will be
run at most once for each `mypy` config used by this repo.
"""
repo_root = Path.cwd() repo_root = Path.cwd()
filename = PurePath(py).relative_to(repo_root).as_posix() plan = make_plan(configs=config_files(), files=[
config.read(repo_root / ini) PurePath(f).relative_to(repo_root).as_posix() for f in files
return any( ])
is_match(pattern=pattern, filename=filename) mypy_results = [
for pattern in re.split(r',\s*', config['mypy']['files'].strip()) mypy.api.run(
# insert custom flags after args to avoid being overridden
# by existing flags in args
args + [
# don't special-case the last line
'--no-error-summary',
f'--config-file={config}',
] + filtered
)
# by construction, filtered must be nonempty
for config, filtered in plan.items()
]
return (
# assume all mypy exit codes are nonnegative
# https://github.com/python/mypy/issues/6003
max(
[exit_code for _, _, exit_code in mypy_results],
default=0,
),
list(dict.fromkeys( # remove duplicates, retain order
item
# assume stderr is empty
# https://github.com/python/mypy/issues/1051
for stdout, _, _ in mypy_results
for item in stdout.splitlines()
)),
) )
@ -70,7 +179,7 @@ def main(args: List[str]) -> None:
- the cwd is set to the root of this cloned repo - the cwd is set to the root of this cloned repo
- args is a valid list of CLI arguments that could be passed to mypy - args is a valid list of CLI arguments that could be passed to mypy
- last element of args is an absolute path to a file to typecheck - some of args are absolute paths to files to typecheck
- all the other args are config flags for mypy, rather than files - all the other args are config flags for mypy, rather than files
These assumptions hold, for instance, when mypy is run automatically These assumptions hold, for instance, when mypy is run automatically
@ -89,43 +198,17 @@ def main(args: List[str]) -> None:
} }
More generally, this should work for any editor sets the cwd to the More generally, this should work for any editor sets the cwd to the
repo root, runs mypy on one file at a time via its absolute path, repo root, runs mypy on individual files via their absolute paths,
and allows you to set the path to the mypy executable. and allows you to set the path to the mypy executable.
""" """
if not args: repo_root = str(Path.cwd())
sys.exit('The PyTorch mypy wrapper must be passed exactly one file.') exit_code, mypy_issues = run(
configs = [f for f in config_files() if in_files(ini=f, py=args[-1])] args=[arg for arg in args if not arg.startswith(repo_root)],
mypy_results = [ files=[arg for arg in args if arg.startswith(repo_root)],
mypy.api.run(
# insert right before args[-1] to avoid being overridden
# by existing flags in args[:-1]
args[:-1] + [
# uniform, in case some configs set these and some don't
'--show-error-codes',
'--show-column-numbers',
# don't special-case the last line
'--no-error-summary',
f'--config-file={config}',
args[-1],
]
) )
for config in configs
]
mypy_issues = list(dict.fromkeys( # remove duplicates, retain order
item
# assume stderr is empty
# https://github.com/python/mypy/issues/1051
for stdout, _, _ in mypy_results
for item in stdout.splitlines()
))
for issue in mypy_issues: for issue in mypy_issues:
print(issue) print(issue)
# assume all mypy exit codes are nonnegative sys.exit(exit_code)
# https://github.com/python/mypy/issues/6003
sys.exit(max(
[exit_code for _, _, exit_code in mypy_results],
default=0,
))
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -164,15 +164,16 @@ if sys.version_info >= (3, 8):
await actions_local_runner.run_mypy(self.test_files, True) await actions_local_runner.run_mypy(self.test_files, True)
# Should exclude the aten/ file # Should exclude the aten/ file; also, apparently mypy
# typechecks files in reverse order
expected = textwrap.dedent(""" expected = textwrap.dedent("""
x mypy (skipped typestub generation) x mypy (skipped typestub generation)
caffe2/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str") [assignment]
caffe2/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str") [assignment]
torch/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str") [assignment]
torch/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str") [assignment]
torch/some_stubs.pyi:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str") [assignment] torch/some_stubs.pyi:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str") [assignment]
torch/some_stubs.pyi:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str") [assignment] torch/some_stubs.pyi:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str") [assignment]
torch/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str") [assignment]
torch/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str") [assignment]
caffe2/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str") [assignment]
caffe2/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str") [assignment]
""").lstrip("\n") # noqa: B950 """).lstrip("\n") # noqa: B950
self.assertEqual(expected, f.getvalue()) self.assertEqual(expected, f.getvalue())

View File

@ -4,73 +4,154 @@ from tools import mypy_wrapper
class TestMypyWrapper(unittest.TestCase): class TestMypyWrapper(unittest.TestCase):
configs = {
'foo.ini': {
'file1.abc',
'dir2',
'dir3/file4.xyz',
},
'bar/baz.ini': {
'file1.abc',
'dir2/dir5/file6.def',
'dir3/file7.abc',
},
}
trie: mypy_wrapper.Trie = {
'file1.abc': {None: {'foo.ini', 'bar/baz.ini'}},
'dir2': {
None: {'foo.ini'},
'dir5': {'file6.def': {None: {'bar/baz.ini'}}},
},
'dir3': {
'file4.xyz': {None: {'foo.ini'}},
'file7.abc': {None: {'bar/baz.ini'}},
},
}
def test_config_files(self) -> None: def test_config_files(self) -> None:
self.assertEqual(mypy_wrapper.config_files(), { self.assertEqual(mypy_wrapper.config_files().keys(), {
'mypy.ini', 'mypy.ini',
'mypy-strict.ini', 'mypy-strict.ini',
}) })
def test_is_match_can_match_individual_files(self) -> None: def test_split_path(self) -> None:
self.assertTrue(mypy_wrapper.is_match( self.assertEqual(mypy_wrapper.split_path('file1.abc'), ['file1.abc'])
pattern='test/test_torch.py', self.assertEqual(
filename='test/test_torch.py', mypy_wrapper.split_path('dir3/file4.xyz'),
)) ['dir3', 'file4.xyz'],
self.assertFalse(mypy_wrapper.is_match( )
pattern='test/test_torch.py', self.assertEqual(
filename='test/test_testing.py', mypy_wrapper.split_path('dir2/dir5/file6.def'),
)) ['dir2', 'dir5', 'file6.def'],
)
def test_is_match_dir_matters(self) -> None: def test_make_trie(self) -> None:
self.assertFalse(mypy_wrapper.is_match( self.assertEqual(mypy_wrapper.make_trie(self.configs), self.trie)
pattern='tools/codegen/utils.py',
filename='torch/nn/modules.py',
))
self.assertTrue(mypy_wrapper.is_match(
pattern='setup.py',
filename='setup.py',
))
self.assertFalse(mypy_wrapper.is_match(
pattern='setup.py',
filename='foo/setup.py',
))
self.assertTrue(mypy_wrapper.is_match(
pattern='foo/setup.py',
filename='foo/setup.py',
))
def test_is_match_can_match_dirs(self) -> None: def test_lookup(self) -> None:
self.assertTrue(mypy_wrapper.is_match( self.assertEqual(
pattern='torch', mypy_wrapper.lookup(self.trie, 'file1.abc'),
filename='torch/random.py', {'foo.ini', 'bar/baz.ini'},
)) )
self.assertTrue(mypy_wrapper.is_match( self.assertEqual(
pattern='torch', mypy_wrapper.lookup(self.trie, 'dir2/dir5/file6.def'),
filename='torch/nn/cpp.py', {'foo.ini', 'bar/baz.ini'},
)) )
self.assertFalse(mypy_wrapper.is_match( self.assertEqual(
pattern='torch', mypy_wrapper.lookup(self.trie, 'dir3/file4.xyz'),
filename='tools/fast_nvcc/fast_nvcc.py', {'foo.ini'},
)) )
self.assertEqual(
mypy_wrapper.lookup(self.trie, 'dir3/file7.abc'),
{'bar/baz.ini'},
)
self.assertEqual(
mypy_wrapper.lookup(self.trie, 'file8.xyz'),
set(),
)
self.assertEqual(
mypy_wrapper.lookup(self.trie, 'dir2/dir9/file10.abc'),
{'foo.ini'},
)
self.assertEqual(
mypy_wrapper.lookup(self.trie, 'dir3/file11.abc'),
set(),
)
def test_is_match_can_match_wildcards(self) -> None: # non-leaves shouldn't ever be passed to lookup in practice, but
self.assertTrue(mypy_wrapper.is_match( # still, good to consider/test these cases
pattern='tools/autograd/*.py', self.assertEqual(
filename='tools/autograd/gen_autograd.py', mypy_wrapper.lookup(self.trie, 'dir2'),
)) {'foo.ini'},
self.assertFalse(mypy_wrapper.is_match( )
pattern='tools/autograd/*.py', self.assertEqual(
filename='tools/autograd/deprecated.yaml', mypy_wrapper.lookup(self.trie, 'dir2/dir5'),
)) {'foo.ini'},
)
self.assertEqual(
mypy_wrapper.lookup(self.trie, 'dir3'),
set(),
)
self.assertEqual(
mypy_wrapper.lookup(self.trie, 'dir2/dir9'),
{'foo.ini'},
)
self.assertEqual(
mypy_wrapper.lookup(self.trie, 'dir4'),
set(),
)
def test_is_match_wildcards_dont_expand_or_collapse(self) -> None: def test_make_plan(self) -> None:
self.assertFalse(mypy_wrapper.is_match( self.assertEqual(
pattern='benchmarks/instruction_counts/*.py', mypy_wrapper.make_plan(configs=self.configs, files=[
filename='benchmarks/instruction_counts/core/utils.py', 'file8.xyz',
)) 'dir3/file11.abc',
self.assertFalse(mypy_wrapper.is_match( ]),
pattern='benchmarks/instruction_counts/*/*.py', {}
filename='benchmarks/instruction_counts/main.py', )
)) self.assertEqual(
mypy_wrapper.make_plan(configs=self.configs, files=[
'file8.xyz',
'dir2/dir9/file10.abc',
'dir3/file4.xyz',
'dir3/file11.abc',
]),
{
'foo.ini': ['dir2/dir9/file10.abc', 'dir3/file4.xyz'],
}
)
self.assertEqual(
mypy_wrapper.make_plan(configs=self.configs, files=[
'file8.xyz',
'dir3/file11.abc',
'dir3/file7.abc',
]),
{
'bar/baz.ini': ['dir3/file7.abc'],
}
)
self.assertEqual(
mypy_wrapper.make_plan(configs=self.configs, files=[
'dir2/dir9/file10.abc',
'dir2/dir5/file6.def',
'dir3/file7.abc',
'file1.abc',
'dir3/file11.abc',
]),
{
'foo.ini': [
'dir2/dir9/file10.abc',
'dir2/dir5/file6.def',
'file1.abc',
],
'bar/baz.ini': [
'dir2/dir5/file6.def',
'dir3/file7.abc',
'file1.abc',
],
}
)
if __name__ == '__main__': if __name__ == '__main__':