Make mypy_wrapper.py accept multiple filenames (#57998)

Summary: A followup to https://github.com/pytorch/pytorch/issues/57752. Pull Request resolved: https://github.com/pytorch/pytorch/pull/57998 Test Plan: ``` mypy --config=mypy-strict.ini python tools/test/test_mypy_wrapper.py python tools/test/test_actions_local_runner.py -k mypy ``` Reviewed By: driazati Differential Revision: D28338531 Pulled By: samestep fbshipit-source-id: ae31e3fa4a2b8060c200f9a13f768beaf2f55694
2025-12-06 12:20:52 +01:00 · 2021-05-11 15:50:51 -07:00 · 2021-05-11 15:50:51 -07:00 · c36055bb42
commit c36055bb42
parent f9c8b7f1a8
5 changed files with 301 additions and 145 deletions
--- a/mypy.ini
+++ b/mypy.ini
@ -8,6 +8,7 @@ cache_dir = .mypy_cache/normal
 warn_unused_configs = True
 warn_redundant_casts = True
 show_error_codes = True
 show_column_numbers = True
 check_untyped_defs = True
 follow_imports = silent
--- a/tools/actions_local_runner.py
+++ b/tools/actions_local_runner.py
@ -173,28 +173,18 @@ async def run_mypy(files: Optional[List[str]], quiet: bool) -> bool:
    if files is not None:
        # Running quick lint, use mypy-wrapper instead so it checks that the files
        # actually should be linted
        stdout = ""
        stderr = ""
        passed = True
-        # Pass each file to the mypy_wrapper script
+        passed, stdout, stderr = await shell_cmd(
-        # TODO: Fix mypy wrapper to mock mypy's args and take in N files instead
+            [sys.executable, "tools/mypy_wrapper.py"] + [
-        # of just 1 at a time
+                os.path.join(REPO_ROOT, f) for f in files
-        for f in files:
+            ],
            f = os.path.join(REPO_ROOT, f)
            f_passed, f_stdout, f_stderr = await shell_cmd(
                [sys.executable, "tools/mypy_wrapper.py", f],
            env=env,
        )
            if not f_passed:
                passed = False
-            if f_stdout != "":
+        print_results("mypy (skipped typestub generation)", passed, [
-                stdout += f_stdout + "\n"
+            stdout + "\n",
-            if f_stderr != "":
+            stderr + "\n",
-                stderr += f_stderr + "\n"
+        ])
        print_results("mypy (skipped typestub generation)", passed, [stdout, stderr])
        return passed
    # Not running quicklint, so use lint.yml
--- a/tools/mypy_wrapper.py
+++ b/tools/mypy_wrapper.py
@ -18,11 +18,11 @@ See also these wiki pages:
 - https://github.com/pytorch/pytorch/wiki/Lint-as-you-type
 """
 import re
 import sys
 from collections import defaultdict
 from configparser import ConfigParser
-from pathlib import Path, PurePath
+from pathlib import Path, PurePath, PurePosixPath
-from typing import List, Set
+from typing import Any, Dict, List, Optional, Set, Tuple
 import mypy.api
 # not part of the public API, but this is the easiest way to ensure that
@ -30,35 +30,144 @@ import mypy.api
 import mypy.config_parser
-def config_files() -> Set[str]:
+def read_config(config_path: Path) -> Set[str]:
    """
-    Return a set of the names of all the PyTorch mypy config files.
+    Return the set of `files` in the `mypy` ini file at config_path.
    """
    return {str(p) for p in Path().glob('mypy*.ini')}
 def is_match(*, pattern: str, filename: str) -> bool:
    """
    Return True iff the filename matches the (mypy ini) glob pattern.
    """
    for path in mypy.config_parser.split_and_match_files(pattern):
        path = PurePath(path).as_posix()
        if filename == path or filename.startswith(f'{path}/'):
            return True
    return False
 def in_files(*, ini: str, py: str) -> bool:
    """
    Return True iff the py file is included in the ini file's "files".
    """
    config = ConfigParser()
    config.read(config_path)
    # hopefully on Windows this gives posix paths
    return set(mypy.config_parser.split_and_match_files(
        config['mypy']['files'],
    ))
 # see tools/test/test_mypy_wrapper.py for examples of many of the
 # following functions
 def config_files() -> Dict[str, Set[str]]:
    """
    Return a dict from all our `mypy` ini filenames to their `files`.
    """
    return {str(ini): read_config(ini) for ini in Path().glob('mypy*.ini')}
 def split_path(path: str) -> List[str]:
    """
    Split a relative (not absolute) POSIX path into its segments.
    """
    pure = PurePosixPath(path)
    return [str(p.name) for p in list(reversed(pure.parents))[1:] + [pure]]
 # mypy doesn't support recursive types yet
 # https://github.com/python/mypy/issues/731
 # but if it did, the `Any` here would be `Union[Set[str], 'Trie']`,
 # although that is not completely accurate: specifically, every `None`
 # key must map to a `Set[str]`, and every `str` key must map to a `Trie`
 Trie = Dict[Optional[str], Any]
 def make_trie(configs: Dict[str, Set[str]]) -> Trie:
    """
    Return a trie from path prefixes to their `mypy` configs.
    Specifically, each layer of the trie represents a segment of a POSIX
    path relative to the root of this repo. If you follow a path down
    the trie and reach a `None` key, that `None` maps to the (nonempty)
    set of keys in `configs` which explicitly include that path.
    """
    trie: Trie = {}
    for ini, files in configs.items():
        for f in files:
            inner = trie
            for segment in split_path(f):
                inner = inner.setdefault(segment, {})
            inner.setdefault(None, set()).add(ini)
    return trie
 def lookup(trie: Trie, filename: str) -> Set[str]:
    """
    Return the configs in `trie` that include a prefix of `filename`.
    A path is included by a config if any of its ancestors are included
    by the wildcard-expanded version of that config's `files`. Thus,
    this function follows `filename`'s path down the `trie` and
    accumulates all the configs it finds along the way.
    """
    configs = set()
    inner = trie
    for segment in split_path(filename):
        inner = inner.get(segment, {})
        configs |= inner.get(None, set())
    return configs
 def make_plan(
    *,
    configs: Dict[str, Set[str]],
    files: List[str]
 ) -> Dict[str, List[str]]:
    """
    Return a dict from config names to the files to run them with.
    The keys of the returned dict are a subset of the keys of `configs`.
    The list of files in each value of returned dict should contain a
    nonempty subset of the given `files`, in the same order as `files`.
    """
    trie = make_trie(configs)
    plan = defaultdict(list)
    for filename in files:
        for config in lookup(trie, filename):
            plan[config].append(filename)
    return plan
 def run(*, args: List[str], files: List[str]) -> Tuple[int, List[str]]:
    """
    Return the exit code and list of output lines from running `mypy`.
    The given `args` are passed verbatim to `mypy`. The `files` (each of
    which must be an absolute path) are converted to relative paths
    (that is, relative to the root of this repo) and then classified
    according to which ones need to be run with each `mypy` config.
    Thus, `mypy` may be run zero, one, or multiple times, but it will be
    run at most once for each `mypy` config used by this repo.
    """
    repo_root = Path.cwd()
-    filename = PurePath(py).relative_to(repo_root).as_posix()
+    plan = make_plan(configs=config_files(), files=[
-    config.read(repo_root / ini)
+        PurePath(f).relative_to(repo_root).as_posix() for f in files
-    return any(
+    ])
-        is_match(pattern=pattern, filename=filename)
+    mypy_results = [
-        for pattern in re.split(r',\s*', config['mypy']['files'].strip())
+        mypy.api.run(
            # insert custom flags after args to avoid being overridden
            # by existing flags in args
            args + [
                # don't special-case the last line
                '--no-error-summary',
                f'--config-file={config}',
            ] + filtered
        )
        # by construction, filtered must be nonempty
        for config, filtered in plan.items()
    ]
    return (
        # assume all mypy exit codes are nonnegative
        # https://github.com/python/mypy/issues/6003
        max(
            [exit_code for _, _, exit_code in mypy_results],
            default=0,
        ),
        list(dict.fromkeys(  # remove duplicates, retain order
            item
            # assume stderr is empty
            # https://github.com/python/mypy/issues/1051
            for stdout, _, _ in mypy_results
            for item in stdout.splitlines()
        )),
    )
@ -70,7 +179,7 @@ def main(args: List[str]) -> None:
    - the cwd is set to the root of this cloned repo
    - args is a valid list of CLI arguments that could be passed to mypy
-    - last element of args is an absolute path to a file to typecheck
+    - some of args are absolute paths to files to typecheck
    - all the other args are config flags for mypy, rather than files
    These assumptions hold, for instance, when mypy is run automatically
@ -89,43 +198,17 @@ def main(args: List[str]) -> None:
        }
    More generally, this should work for any editor sets the cwd to the
-    repo root, runs mypy on one file at a time via its absolute path,
+    repo root, runs mypy on individual files via their absolute paths,
    and allows you to set the path to the mypy executable.
    """
-    if not args:
+    repo_root = str(Path.cwd())
-        sys.exit('The PyTorch mypy wrapper must be passed exactly one file.')
+    exit_code, mypy_issues = run(
-    configs = [f for f in config_files() if in_files(ini=f, py=args[-1])]
+        args=[arg for arg in args if not arg.startswith(repo_root)],
-    mypy_results = [
+        files=[arg for arg in args if arg.startswith(repo_root)],
        mypy.api.run(
            # insert right before args[-1] to avoid being overridden
            # by existing flags in args[:-1]
            args[:-1] + [
                # uniform, in case some configs set these and some don't
                '--show-error-codes',
                '--show-column-numbers',
                # don't special-case the last line
                '--no-error-summary',
                f'--config-file={config}',
                args[-1],
            ]
    )
        for config in configs
    ]
    mypy_issues = list(dict.fromkeys(  # remove duplicates, retain order
        item
        # assume stderr is empty
        # https://github.com/python/mypy/issues/1051
        for stdout, _, _ in mypy_results
        for item in stdout.splitlines()
    ))
    for issue in mypy_issues:
        print(issue)
-    # assume all mypy exit codes are nonnegative
+    sys.exit(exit_code)
    # https://github.com/python/mypy/issues/6003
    sys.exit(max(
        [exit_code for _, _, exit_code in mypy_results],
        default=0,
    ))
 if __name__ == '__main__':
--- a/tools/test/test_actions_local_runner.py
+++ b/tools/test/test_actions_local_runner.py
@ -164,15 +164,16 @@ if sys.version_info >= (3, 8):
                await actions_local_runner.run_mypy(self.test_files, True)
-            # Should exclude the aten/ file
+            # Should exclude the aten/ file; also, apparently mypy
            # typechecks files in reverse order
            expected = textwrap.dedent("""
                x mypy (skipped typestub generation)
                caffe2/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
                caffe2/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
                torch/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
                torch/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
                torch/some_stubs.pyi:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
                torch/some_stubs.pyi:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
                torch/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
                torch/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
                caffe2/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
                caffe2/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
            """).lstrip("\n")  # noqa: B950
            self.assertEqual(expected, f.getvalue())
--- a/tools/test/test_mypy_wrapper.py
+++ b/tools/test/test_mypy_wrapper.py
@ -4,73 +4,154 @@ from tools import mypy_wrapper
 class TestMypyWrapper(unittest.TestCase):
    configs = {
        'foo.ini': {
            'file1.abc',
            'dir2',
            'dir3/file4.xyz',
        },
        'bar/baz.ini': {
            'file1.abc',
            'dir2/dir5/file6.def',
            'dir3/file7.abc',
        },
    }
    trie: mypy_wrapper.Trie = {
        'file1.abc': {None: {'foo.ini', 'bar/baz.ini'}},
        'dir2': {
            None: {'foo.ini'},
            'dir5': {'file6.def': {None: {'bar/baz.ini'}}},
        },
        'dir3': {
            'file4.xyz': {None: {'foo.ini'}},
            'file7.abc': {None: {'bar/baz.ini'}},
        },
    }
    def test_config_files(self) -> None:
-        self.assertEqual(mypy_wrapper.config_files(), {
+        self.assertEqual(mypy_wrapper.config_files().keys(), {
            'mypy.ini',
            'mypy-strict.ini',
        })
-    def test_is_match_can_match_individual_files(self) -> None:
+    def test_split_path(self) -> None:
-        self.assertTrue(mypy_wrapper.is_match(
+        self.assertEqual(mypy_wrapper.split_path('file1.abc'), ['file1.abc'])
-            pattern='test/test_torch.py',
+        self.assertEqual(
-            filename='test/test_torch.py',
+            mypy_wrapper.split_path('dir3/file4.xyz'),
-        ))
+            ['dir3', 'file4.xyz'],
-        self.assertFalse(mypy_wrapper.is_match(
+        )
-            pattern='test/test_torch.py',
+        self.assertEqual(
-            filename='test/test_testing.py',
+            mypy_wrapper.split_path('dir2/dir5/file6.def'),
-        ))
+            ['dir2', 'dir5', 'file6.def'],
        )
-    def test_is_match_dir_matters(self) -> None:
+    def test_make_trie(self) -> None:
-        self.assertFalse(mypy_wrapper.is_match(
+        self.assertEqual(mypy_wrapper.make_trie(self.configs), self.trie)
            pattern='tools/codegen/utils.py',
            filename='torch/nn/modules.py',
        ))
        self.assertTrue(mypy_wrapper.is_match(
            pattern='setup.py',
            filename='setup.py',
        ))
        self.assertFalse(mypy_wrapper.is_match(
            pattern='setup.py',
            filename='foo/setup.py',
        ))
        self.assertTrue(mypy_wrapper.is_match(
            pattern='foo/setup.py',
            filename='foo/setup.py',
        ))
-    def test_is_match_can_match_dirs(self) -> None:
+    def test_lookup(self) -> None:
-        self.assertTrue(mypy_wrapper.is_match(
+        self.assertEqual(
-            pattern='torch',
+            mypy_wrapper.lookup(self.trie, 'file1.abc'),
-            filename='torch/random.py',
+            {'foo.ini', 'bar/baz.ini'},
-        ))
+        )
-        self.assertTrue(mypy_wrapper.is_match(
+        self.assertEqual(
-            pattern='torch',
+            mypy_wrapper.lookup(self.trie, 'dir2/dir5/file6.def'),
-            filename='torch/nn/cpp.py',
+            {'foo.ini', 'bar/baz.ini'},
-        ))
+        )
-        self.assertFalse(mypy_wrapper.is_match(
+        self.assertEqual(
-            pattern='torch',
+            mypy_wrapper.lookup(self.trie, 'dir3/file4.xyz'),
-            filename='tools/fast_nvcc/fast_nvcc.py',
+            {'foo.ini'},
-        ))
+        )
        self.assertEqual(
            mypy_wrapper.lookup(self.trie, 'dir3/file7.abc'),
            {'bar/baz.ini'},
        )
        self.assertEqual(
            mypy_wrapper.lookup(self.trie, 'file8.xyz'),
            set(),
        )
        self.assertEqual(
            mypy_wrapper.lookup(self.trie, 'dir2/dir9/file10.abc'),
            {'foo.ini'},
        )
        self.assertEqual(
            mypy_wrapper.lookup(self.trie, 'dir3/file11.abc'),
            set(),
        )
-    def test_is_match_can_match_wildcards(self) -> None:
+        # non-leaves shouldn't ever be passed to lookup in practice, but
-        self.assertTrue(mypy_wrapper.is_match(
+        # still, good to consider/test these cases
-            pattern='tools/autograd/*.py',
+        self.assertEqual(
-            filename='tools/autograd/gen_autograd.py',
+            mypy_wrapper.lookup(self.trie, 'dir2'),
-        ))
+            {'foo.ini'},
-        self.assertFalse(mypy_wrapper.is_match(
+        )
-            pattern='tools/autograd/*.py',
+        self.assertEqual(
-            filename='tools/autograd/deprecated.yaml',
+            mypy_wrapper.lookup(self.trie, 'dir2/dir5'),
-        ))
+            {'foo.ini'},
        )
        self.assertEqual(
            mypy_wrapper.lookup(self.trie, 'dir3'),
            set(),
        )
        self.assertEqual(
            mypy_wrapper.lookup(self.trie, 'dir2/dir9'),
            {'foo.ini'},
        )
        self.assertEqual(
            mypy_wrapper.lookup(self.trie, 'dir4'),
            set(),
        )
-    def test_is_match_wildcards_dont_expand_or_collapse(self) -> None:
+    def test_make_plan(self) -> None:
-        self.assertFalse(mypy_wrapper.is_match(
+        self.assertEqual(
-            pattern='benchmarks/instruction_counts/*.py',
+            mypy_wrapper.make_plan(configs=self.configs, files=[
-            filename='benchmarks/instruction_counts/core/utils.py',
+                'file8.xyz',
-        ))
+                'dir3/file11.abc',
-        self.assertFalse(mypy_wrapper.is_match(
+            ]),
-            pattern='benchmarks/instruction_counts/*/*.py',
+            {}
-            filename='benchmarks/instruction_counts/main.py',
+        )
-        ))
+        self.assertEqual(
            mypy_wrapper.make_plan(configs=self.configs, files=[
                'file8.xyz',
                'dir2/dir9/file10.abc',
                'dir3/file4.xyz',
                'dir3/file11.abc',
            ]),
            {
                'foo.ini': ['dir2/dir9/file10.abc', 'dir3/file4.xyz'],
            }
        )
        self.assertEqual(
            mypy_wrapper.make_plan(configs=self.configs, files=[
                'file8.xyz',
                'dir3/file11.abc',
                'dir3/file7.abc',
            ]),
            {
                'bar/baz.ini': ['dir3/file7.abc'],
            }
        )
        self.assertEqual(
            mypy_wrapper.make_plan(configs=self.configs, files=[
                'dir2/dir9/file10.abc',
                'dir2/dir5/file6.def',
                'dir3/file7.abc',
                'file1.abc',
                'dir3/file11.abc',
            ]),
            {
                'foo.ini': [
                    'dir2/dir9/file10.abc',
                    'dir2/dir5/file6.def',
                    'file1.abc',
                ],
                'bar/baz.ini': [
                    'dir2/dir5/file6.def',
                    'dir3/file7.abc',
                    'file1.abc',
                ],
            }
        )
 if __name__ == '__main__':