Make mypy_wrapper.py accept multiple filenames (#57998)

Summary: A followup to https://github.com/pytorch/pytorch/issues/57752. Pull Request resolved: https://github.com/pytorch/pytorch/pull/57998 Test Plan: ``` mypy --config=mypy-strict.ini python tools/test/test_mypy_wrapper.py python tools/test/test_actions_local_runner.py -k mypy ``` Reviewed By: driazati Differential Revision: D28338531 Pulled By: samestep fbshipit-source-id: ae31e3fa4a2b8060c200f9a13f768beaf2f55694
2025-12-06 12:20:52 +01:00 · 2021-05-11 15:50:51 -07:00 · 2021-05-11 15:50:51 -07:00 · c36055bb42
commit c36055bb42
parent f9c8b7f1a8
5 changed files with 301 additions and 145 deletions
--- a/mypy.ini
+++ b/mypy.ini
@ -8,6 +8,7 @@ cache_dir = .mypy_cache/normal
 warn_unused_configs = True
 warn_redundant_casts = True
 show_error_codes = True
+show_column_numbers = True
 check_untyped_defs = True
 follow_imports = silent

--- a/tools/actions_local_runner.py
+++ b/tools/actions_local_runner.py
@ -173,28 +173,18 @@ async def run_mypy(files: Optional[List[str]], quiet: bool) -> bool:
    if files is not None:
        # Running quick lint, use mypy-wrapper instead so it checks that the files
        # actually should be linted
-        stdout = ""
-        stderr = ""
-        passed = True

-        # Pass each file to the mypy_wrapper script
-        # TODO: Fix mypy wrapper to mock mypy's args and take in N files instead
-        # of just 1 at a time
-        for f in files:
-            f = os.path.join(REPO_ROOT, f)
-            f_passed, f_stdout, f_stderr = await shell_cmd(
-                [sys.executable, "tools/mypy_wrapper.py", f],
-                env=env,
-            )
-            if not f_passed:
-                passed = False
+        passed, stdout, stderr = await shell_cmd(
+            [sys.executable, "tools/mypy_wrapper.py"] + [
+                os.path.join(REPO_ROOT, f) for f in files
+            ],
+            env=env,
+        )

-            if f_stdout != "":
-                stdout += f_stdout + "\n"
-            if f_stderr != "":
-                stderr += f_stderr + "\n"
-
-        print_results("mypy (skipped typestub generation)", passed, [stdout, stderr])
+        print_results("mypy (skipped typestub generation)", passed, [
+            stdout + "\n",
+            stderr + "\n",
+        ])
        return passed

    # Not running quicklint, so use lint.yml
--- a/tools/mypy_wrapper.py
+++ b/tools/mypy_wrapper.py
@ -18,11 +18,11 @@ See also these wiki pages:
 - https://github.com/pytorch/pytorch/wiki/Lint-as-you-type
 """

-import re
 import sys
+from collections import defaultdict
 from configparser import ConfigParser
-from pathlib import Path, PurePath
-from typing import List, Set
+from pathlib import Path, PurePath, PurePosixPath
+from typing import Any, Dict, List, Optional, Set, Tuple

 import mypy.api
 # not part of the public API, but this is the easiest way to ensure that
@ -30,35 +30,144 @@ import mypy.api
 import mypy.config_parser


-def config_files() -> Set[str]:
+def read_config(config_path: Path) -> Set[str]:
    """
-    Return a set of the names of all the PyTorch mypy config files.
-    """
-    return {str(p) for p in Path().glob('mypy*.ini')}
-
-
-def is_match(*, pattern: str, filename: str) -> bool:
-    """
-    Return True iff the filename matches the (mypy ini) glob pattern.
-    """
-    for path in mypy.config_parser.split_and_match_files(pattern):
-        path = PurePath(path).as_posix()
-        if filename == path or filename.startswith(f'{path}/'):
-            return True
-    return False
-
-
-def in_files(*, ini: str, py: str) -> bool:
-    """
-    Return True iff the py file is included in the ini file's "files".
+    Return the set of `files` in the `mypy` ini file at config_path.
    """
    config = ConfigParser()
+    config.read(config_path)
+    # hopefully on Windows this gives posix paths
+    return set(mypy.config_parser.split_and_match_files(
+        config['mypy']['files'],
+    ))
+
+
+# see tools/test/test_mypy_wrapper.py for examples of many of the
+# following functions
+
+
+def config_files() -> Dict[str, Set[str]]:
+    """
+    Return a dict from all our `mypy` ini filenames to their `files`.
+    """
+    return {str(ini): read_config(ini) for ini in Path().glob('mypy*.ini')}
+
+
+def split_path(path: str) -> List[str]:
+    """
+    Split a relative (not absolute) POSIX path into its segments.
+    """
+    pure = PurePosixPath(path)
+    return [str(p.name) for p in list(reversed(pure.parents))[1:] + [pure]]
+
+
+# mypy doesn't support recursive types yet
+# https://github.com/python/mypy/issues/731
+
+# but if it did, the `Any` here would be `Union[Set[str], 'Trie']`,
+# although that is not completely accurate: specifically, every `None`
+# key must map to a `Set[str]`, and every `str` key must map to a `Trie`
+Trie = Dict[Optional[str], Any]
+
+
+def make_trie(configs: Dict[str, Set[str]]) -> Trie:
+    """
+    Return a trie from path prefixes to their `mypy` configs.
+
+    Specifically, each layer of the trie represents a segment of a POSIX
+    path relative to the root of this repo. If you follow a path down
+    the trie and reach a `None` key, that `None` maps to the (nonempty)
+    set of keys in `configs` which explicitly include that path.
+    """
+    trie: Trie = {}
+    for ini, files in configs.items():
+        for f in files:
+            inner = trie
+            for segment in split_path(f):
+                inner = inner.setdefault(segment, {})
+            inner.setdefault(None, set()).add(ini)
+    return trie
+
+
+def lookup(trie: Trie, filename: str) -> Set[str]:
+    """
+    Return the configs in `trie` that include a prefix of `filename`.
+
+    A path is included by a config if any of its ancestors are included
+    by the wildcard-expanded version of that config's `files`. Thus,
+    this function follows `filename`'s path down the `trie` and
+    accumulates all the configs it finds along the way.
+    """
+    configs = set()
+    inner = trie
+    for segment in split_path(filename):
+        inner = inner.get(segment, {})
+        configs |= inner.get(None, set())
+    return configs
+
+
+def make_plan(
+    *,
+    configs: Dict[str, Set[str]],
+    files: List[str]
+) -> Dict[str, List[str]]:
+    """
+    Return a dict from config names to the files to run them with.
+
+    The keys of the returned dict are a subset of the keys of `configs`.
+    The list of files in each value of returned dict should contain a
+    nonempty subset of the given `files`, in the same order as `files`.
+    """
+    trie = make_trie(configs)
+    plan = defaultdict(list)
+    for filename in files:
+        for config in lookup(trie, filename):
+            plan[config].append(filename)
+    return plan
+
+
+def run(*, args: List[str], files: List[str]) -> Tuple[int, List[str]]:
+    """
+    Return the exit code and list of output lines from running `mypy`.
+
+    The given `args` are passed verbatim to `mypy`. The `files` (each of
+    which must be an absolute path) are converted to relative paths
+    (that is, relative to the root of this repo) and then classified
+    according to which ones need to be run with each `mypy` config.
+    Thus, `mypy` may be run zero, one, or multiple times, but it will be
+    run at most once for each `mypy` config used by this repo.
+    """
    repo_root = Path.cwd()
-    filename = PurePath(py).relative_to(repo_root).as_posix()
-    config.read(repo_root / ini)
-    return any(
-        is_match(pattern=pattern, filename=filename)
-        for pattern in re.split(r',\s*', config['mypy']['files'].strip())
+    plan = make_plan(configs=config_files(), files=[
+        PurePath(f).relative_to(repo_root).as_posix() for f in files
+    ])
+    mypy_results = [
+        mypy.api.run(
+            # insert custom flags after args to avoid being overridden
+            # by existing flags in args
+            args + [
+                # don't special-case the last line
+                '--no-error-summary',
+                f'--config-file={config}',
+            ] + filtered
+        )
+        # by construction, filtered must be nonempty
+        for config, filtered in plan.items()
+    ]
+    return (
+        # assume all mypy exit codes are nonnegative
+        # https://github.com/python/mypy/issues/6003
+        max(
+            [exit_code for _, _, exit_code in mypy_results],
+            default=0,
+        ),
+        list(dict.fromkeys(  # remove duplicates, retain order
+            item
+            # assume stderr is empty
+            # https://github.com/python/mypy/issues/1051
+            for stdout, _, _ in mypy_results
+            for item in stdout.splitlines()
+        )),
    )


@ -70,7 +179,7 @@ def main(args: List[str]) -> None:

    - the cwd is set to the root of this cloned repo
    - args is a valid list of CLI arguments that could be passed to mypy
-    - last element of args is an absolute path to a file to typecheck
+    - some of args are absolute paths to files to typecheck
    - all the other args are config flags for mypy, rather than files

    These assumptions hold, for instance, when mypy is run automatically
@ -89,43 +198,17 @@ def main(args: List[str]) -> None:
        }

    More generally, this should work for any editor sets the cwd to the
-    repo root, runs mypy on one file at a time via its absolute path,
+    repo root, runs mypy on individual files via their absolute paths,
    and allows you to set the path to the mypy executable.
    """
-    if not args:
-        sys.exit('The PyTorch mypy wrapper must be passed exactly one file.')
-    configs = [f for f in config_files() if in_files(ini=f, py=args[-1])]
-    mypy_results = [
-        mypy.api.run(
-            # insert right before args[-1] to avoid being overridden
-            # by existing flags in args[:-1]
-            args[:-1] + [
-                # uniform, in case some configs set these and some don't
-                '--show-error-codes',
-                '--show-column-numbers',
-                # don't special-case the last line
-                '--no-error-summary',
-                f'--config-file={config}',
-                args[-1],
-            ]
-        )
-        for config in configs
-    ]
-    mypy_issues = list(dict.fromkeys(  # remove duplicates, retain order
-        item
-        # assume stderr is empty
-        # https://github.com/python/mypy/issues/1051
-        for stdout, _, _ in mypy_results
-        for item in stdout.splitlines()
-    ))
+    repo_root = str(Path.cwd())
+    exit_code, mypy_issues = run(
+        args=[arg for arg in args if not arg.startswith(repo_root)],
+        files=[arg for arg in args if arg.startswith(repo_root)],
+    )
    for issue in mypy_issues:
        print(issue)
-    # assume all mypy exit codes are nonnegative
-    # https://github.com/python/mypy/issues/6003
-    sys.exit(max(
-        [exit_code for _, _, exit_code in mypy_results],
-        default=0,
-    ))
+    sys.exit(exit_code)


 if __name__ == '__main__':
--- a/tools/test/test_actions_local_runner.py
+++ b/tools/test/test_actions_local_runner.py
@ -164,15 +164,16 @@ if sys.version_info >= (3, 8):
                await actions_local_runner.run_mypy(self.test_files, True)


-            # Should exclude the aten/ file
+            # Should exclude the aten/ file; also, apparently mypy
+            # typechecks files in reverse order
            expected = textwrap.dedent("""
                x mypy (skipped typestub generation)
-                caffe2/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
-                caffe2/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
-                torch/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
-                torch/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
                torch/some_stubs.pyi:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
                torch/some_stubs.pyi:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
+                torch/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
+                torch/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
+                caffe2/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
+                caffe2/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
            """).lstrip("\n")  # noqa: B950
            self.assertEqual(expected, f.getvalue())

--- a/tools/test/test_mypy_wrapper.py
+++ b/tools/test/test_mypy_wrapper.py
@ -4,73 +4,154 @@ from tools import mypy_wrapper


 class TestMypyWrapper(unittest.TestCase):
+    configs = {
+        'foo.ini': {
+            'file1.abc',
+            'dir2',
+            'dir3/file4.xyz',
+        },
+        'bar/baz.ini': {
+            'file1.abc',
+            'dir2/dir5/file6.def',
+            'dir3/file7.abc',
+        },
+    }
+
+    trie: mypy_wrapper.Trie = {
+        'file1.abc': {None: {'foo.ini', 'bar/baz.ini'}},
+        'dir2': {
+            None: {'foo.ini'},
+            'dir5': {'file6.def': {None: {'bar/baz.ini'}}},
+        },
+        'dir3': {
+            'file4.xyz': {None: {'foo.ini'}},
+            'file7.abc': {None: {'bar/baz.ini'}},
+        },
+    }
+
    def test_config_files(self) -> None:
-        self.assertEqual(mypy_wrapper.config_files(), {
+        self.assertEqual(mypy_wrapper.config_files().keys(), {
            'mypy.ini',
            'mypy-strict.ini',
        })

-    def test_is_match_can_match_individual_files(self) -> None:
-        self.assertTrue(mypy_wrapper.is_match(
-            pattern='test/test_torch.py',
-            filename='test/test_torch.py',
-        ))
-        self.assertFalse(mypy_wrapper.is_match(
-            pattern='test/test_torch.py',
-            filename='test/test_testing.py',
-        ))
+    def test_split_path(self) -> None:
+        self.assertEqual(mypy_wrapper.split_path('file1.abc'), ['file1.abc'])
+        self.assertEqual(
+            mypy_wrapper.split_path('dir3/file4.xyz'),
+            ['dir3', 'file4.xyz'],
+        )
+        self.assertEqual(
+            mypy_wrapper.split_path('dir2/dir5/file6.def'),
+            ['dir2', 'dir5', 'file6.def'],
+        )

-    def test_is_match_dir_matters(self) -> None:
-        self.assertFalse(mypy_wrapper.is_match(
-            pattern='tools/codegen/utils.py',
-            filename='torch/nn/modules.py',
-        ))
-        self.assertTrue(mypy_wrapper.is_match(
-            pattern='setup.py',
-            filename='setup.py',
-        ))
-        self.assertFalse(mypy_wrapper.is_match(
-            pattern='setup.py',
-            filename='foo/setup.py',
-        ))
-        self.assertTrue(mypy_wrapper.is_match(
-            pattern='foo/setup.py',
-            filename='foo/setup.py',
-        ))
+    def test_make_trie(self) -> None:
+        self.assertEqual(mypy_wrapper.make_trie(self.configs), self.trie)

-    def test_is_match_can_match_dirs(self) -> None:
-        self.assertTrue(mypy_wrapper.is_match(
-            pattern='torch',
-            filename='torch/random.py',
-        ))
-        self.assertTrue(mypy_wrapper.is_match(
-            pattern='torch',
-            filename='torch/nn/cpp.py',
-        ))
-        self.assertFalse(mypy_wrapper.is_match(
-            pattern='torch',
-            filename='tools/fast_nvcc/fast_nvcc.py',
-        ))
+    def test_lookup(self) -> None:
+        self.assertEqual(
+            mypy_wrapper.lookup(self.trie, 'file1.abc'),
+            {'foo.ini', 'bar/baz.ini'},
+        )
+        self.assertEqual(
+            mypy_wrapper.lookup(self.trie, 'dir2/dir5/file6.def'),
+            {'foo.ini', 'bar/baz.ini'},
+        )
+        self.assertEqual(
+            mypy_wrapper.lookup(self.trie, 'dir3/file4.xyz'),
+            {'foo.ini'},
+        )
+        self.assertEqual(
+            mypy_wrapper.lookup(self.trie, 'dir3/file7.abc'),
+            {'bar/baz.ini'},
+        )
+        self.assertEqual(
+            mypy_wrapper.lookup(self.trie, 'file8.xyz'),
+            set(),
+        )
+        self.assertEqual(
+            mypy_wrapper.lookup(self.trie, 'dir2/dir9/file10.abc'),
+            {'foo.ini'},
+        )
+        self.assertEqual(
+            mypy_wrapper.lookup(self.trie, 'dir3/file11.abc'),
+            set(),
+        )

-    def test_is_match_can_match_wildcards(self) -> None:
-        self.assertTrue(mypy_wrapper.is_match(
-            pattern='tools/autograd/*.py',
-            filename='tools/autograd/gen_autograd.py',
-        ))
-        self.assertFalse(mypy_wrapper.is_match(
-            pattern='tools/autograd/*.py',
-            filename='tools/autograd/deprecated.yaml',
-        ))
+        # non-leaves shouldn't ever be passed to lookup in practice, but
+        # still, good to consider/test these cases
+        self.assertEqual(
+            mypy_wrapper.lookup(self.trie, 'dir2'),
+            {'foo.ini'},
+        )
+        self.assertEqual(
+            mypy_wrapper.lookup(self.trie, 'dir2/dir5'),
+            {'foo.ini'},
+        )
+        self.assertEqual(
+            mypy_wrapper.lookup(self.trie, 'dir3'),
+            set(),
+        )
+        self.assertEqual(
+            mypy_wrapper.lookup(self.trie, 'dir2/dir9'),
+            {'foo.ini'},
+        )
+        self.assertEqual(
+            mypy_wrapper.lookup(self.trie, 'dir4'),
+            set(),
+        )

-    def test_is_match_wildcards_dont_expand_or_collapse(self) -> None:
-        self.assertFalse(mypy_wrapper.is_match(
-            pattern='benchmarks/instruction_counts/*.py',
-            filename='benchmarks/instruction_counts/core/utils.py',
-        ))
-        self.assertFalse(mypy_wrapper.is_match(
-            pattern='benchmarks/instruction_counts/*/*.py',
-            filename='benchmarks/instruction_counts/main.py',
-        ))
+    def test_make_plan(self) -> None:
+        self.assertEqual(
+            mypy_wrapper.make_plan(configs=self.configs, files=[
+                'file8.xyz',
+                'dir3/file11.abc',
+            ]),
+            {}
+        )
+        self.assertEqual(
+            mypy_wrapper.make_plan(configs=self.configs, files=[
+                'file8.xyz',
+                'dir2/dir9/file10.abc',
+                'dir3/file4.xyz',
+                'dir3/file11.abc',
+            ]),
+            {
+                'foo.ini': ['dir2/dir9/file10.abc', 'dir3/file4.xyz'],
+            }
+        )
+        self.assertEqual(
+            mypy_wrapper.make_plan(configs=self.configs, files=[
+                'file8.xyz',
+                'dir3/file11.abc',
+                'dir3/file7.abc',
+            ]),
+            {
+                'bar/baz.ini': ['dir3/file7.abc'],
+            }
+        )
+        self.assertEqual(
+            mypy_wrapper.make_plan(configs=self.configs, files=[
+                'dir2/dir9/file10.abc',
+                'dir2/dir5/file6.def',
+                'dir3/file7.abc',
+                'file1.abc',
+                'dir3/file11.abc',
+            ]),
+            {
+                'foo.ini': [
+                    'dir2/dir9/file10.abc',
+                    'dir2/dir5/file6.def',
+                    'file1.abc',
+                ],
+                'bar/baz.ini': [
+                    'dir2/dir5/file6.def',
+                    'dir3/file7.abc',
+                    'file1.abc',
+                ],
+            }
+        )


 if __name__ == '__main__':