pytorch/tools/linter/adapters/newlines_linter.py
Aaron Orenstein b55b779ad3 Add file size limits to linters and refactor grep_linter (#166202)
- Add 1GB file size limits to grep_linter, newlines_linter, codespell_linter
- Refactor grep_linter
  - process files once instead of per-line
  - Extract allowlist check to separate function
  - Add 512KB limit for computing replacements, 100 match limit per file
  - Detect duplicate arguments
- Fix .lintrunner.toml: RAWCUDADEVICE used --pattern twice
Pull Request resolved: https://github.com/pytorch/pytorch/pull/166202
Approved by: https://github.com/Skylion007
2025-10-25 14:57:19 +00:00

197 lines
5.6 KiB
Python

"""
NEWLINE: Checks files to make sure there are no trailing newlines.
"""
from __future__ import annotations
import argparse
import json
import logging
import os
import sys
from enum import Enum
from typing import NamedTuple
NEWLINE = 10 # ASCII "\n"
CARRIAGE_RETURN = 13 # ASCII "\r"
LINTER_CODE = "NEWLINE"
MAX_FILE_SIZE: int = 1024 * 1024 * 1024 # 1GB in bytes
class LintSeverity(str, Enum):
ERROR = "error"
WARNING = "warning"
ADVICE = "advice"
DISABLED = "disabled"
class LintMessage(NamedTuple):
path: str | None
line: int | None
char: int | None
code: str
severity: LintSeverity
name: str
original: str | None
replacement: str | None
description: str | None
def check_file(filename: str) -> LintMessage | None:
logging.debug("Checking file %s", filename)
# Check if file is too large
try:
file_size = os.path.getsize(filename)
if file_size > MAX_FILE_SIZE:
return LintMessage(
path=filename,
line=None,
char=None,
code=LINTER_CODE,
severity=LintSeverity.WARNING,
name="file-too-large",
original=None,
replacement=None,
description=f"File size ({file_size} bytes) exceeds {MAX_FILE_SIZE} bytes limit, skipping",
)
except OSError as err:
return LintMessage(
path=filename,
line=None,
char=None,
code=LINTER_CODE,
severity=LintSeverity.ERROR,
name="file-access-error",
original=None,
replacement=None,
description=f"Failed to get file size: {err}",
)
with open(filename, "rb") as f:
lines = f.readlines()
if len(lines) == 0:
# File is empty, just leave it alone.
return None
if len(lines) == 1 and len(lines[0]) == 1:
# file is wrong whether or not the only byte is a newline
return LintMessage(
path=filename,
line=None,
char=None,
code=LINTER_CODE,
severity=LintSeverity.ERROR,
name="testestTrailing newline",
original=None,
replacement=None,
description="Trailing newline found. Run `lintrunner --take NEWLINE -a` to apply changes.",
)
if len(lines[-1]) == 1 and lines[-1][0] == NEWLINE:
try:
original = b"".join(lines).decode("utf-8")
except Exception as err:
return LintMessage(
path=filename,
line=None,
char=None,
code=LINTER_CODE,
severity=LintSeverity.ERROR,
name="Decoding failure",
original=None,
replacement=None,
description=f"utf-8 decoding failed due to {err.__class__.__name__}:\n{err}",
)
return LintMessage(
path=filename,
line=None,
char=None,
code=LINTER_CODE,
severity=LintSeverity.ERROR,
name="Trailing newline",
original=original,
replacement=original.rstrip("\n") + "\n",
description="Trailing newline found. Run `lintrunner --take NEWLINE -a` to apply changes.",
)
has_changes = False
original_lines: list[bytes] | None = None
for idx, line in enumerate(lines):
if len(line) >= 2 and line[-1] == NEWLINE and line[-2] == CARRIAGE_RETURN:
if not has_changes:
original_lines = list(lines)
has_changes = True
lines[idx] = line[:-2] + b"\n"
if has_changes:
try:
assert original_lines is not None
original = b"".join(original_lines).decode("utf-8")
replacement = b"".join(lines).decode("utf-8")
except Exception as err:
return LintMessage(
path=filename,
line=None,
char=None,
code=LINTER_CODE,
severity=LintSeverity.ERROR,
name="Decoding failure",
original=None,
replacement=None,
description=f"utf-8 decoding failed due to {err.__class__.__name__}:\n{err}",
)
return LintMessage(
path=filename,
line=None,
char=None,
code=LINTER_CODE,
severity=LintSeverity.ERROR,
name="DOS newline",
original=original,
replacement=replacement,
description="DOS newline found. Run `lintrunner --take NEWLINE -a` to apply changes.",
)
return None
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="native functions linter",
fromfile_prefix_chars="@",
)
parser.add_argument(
"--verbose",
action="store_true",
help="location of native_functions.yaml",
)
parser.add_argument(
"filenames",
nargs="+",
help="paths to lint",
)
args = parser.parse_args()
logging.basicConfig(
format="<%(threadName)s:%(levelname)s> %(message)s",
level=logging.NOTSET
if args.verbose
else logging.DEBUG
if len(args.filenames) < 1000
else logging.INFO,
stream=sys.stderr,
)
lint_messages = []
for filename in args.filenames:
lint_message = check_file(filename)
if lint_message is not None:
lint_messages.append(lint_message)
for lint_message in lint_messages:
print(json.dumps(lint_message._asdict()), flush=True)