From 580f1183b4deb047e47887dca94b60f6f60ed4da Mon Sep 17 00:00:00 2001 From: Zesheng Zong Date: Tue, 25 Feb 2025 18:27:30 +0000 Subject: [PATCH] Enable ruff rule S324 (#147665) Fixes #147627 - Add `S324` in `pyproject.toml ` - Running check and clean warnings ```bash lintrunner --take RUFF --all-files ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/147665 Approved by: https://github.com/Skylion007 Co-authored-by: Aaron Gokaslan --- .github/scripts/pytest_caching_utils.py | 2 +- benchmarks/instruction_counts/applications/ci.py | 2 +- pyproject.toml | 1 + tools/stats/upload_dynamo_perf_stats.py | 4 +++- torch/_logging/_internal.py | 2 +- torch/distributed/distributed_c10d.py | 2 +- torch/fx/passes/graph_drawer.py | 7 ++++++- torch/utils/_config_module.py | 4 +++- torch/utils/_content_store.py | 2 +- torchgen/utils.py | 2 +- 10 files changed, 19 insertions(+), 9 deletions(-) diff --git a/.github/scripts/pytest_caching_utils.py b/.github/scripts/pytest_caching_utils.py index 0cfb4e823f6..5101dd2a832 100644 --- a/.github/scripts/pytest_caching_utils.py +++ b/.github/scripts/pytest_caching_utils.py @@ -33,7 +33,7 @@ class PRIdentifier(str): __slots__ = () def __new__(cls, value: str) -> "PRIdentifier": - md5 = hashlib.md5(value.encode("utf-8")).hexdigest() + md5 = hashlib.md5(value.encode("utf-8"), usedforsecurity=False).hexdigest() return super().__new__(cls, md5) diff --git a/benchmarks/instruction_counts/applications/ci.py b/benchmarks/instruction_counts/applications/ci.py index 86ba3a93629..4c9517b0f89 100644 --- a/benchmarks/instruction_counts/applications/ci.py +++ b/benchmarks/instruction_counts/applications/ci.py @@ -44,7 +44,7 @@ def main(argv: list[str]) -> None: ) keys = tuple({str(work_order): None for work_order in work_orders}.keys()) - md5 = hashlib.md5() + md5 = hashlib.md5(usedforsecurity=False) for key in keys: md5.update(key.encode("utf-8")) diff --git a/pyproject.toml b/pyproject.toml index f6c5bffedd4..e84d980ff30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -149,6 +149,7 @@ select = [ "RUF024", # from keys mutable "RUF026", # default factory kwarg "RUF030", # No print statement in assert + "S324", # for hashlib FIPS compliance "SLOT", "TCH", "TRY002", # ban vanilla raise (todo fix NOQAs) diff --git a/tools/stats/upload_dynamo_perf_stats.py b/tools/stats/upload_dynamo_perf_stats.py index 0cfd7856262..49af6151a10 100644 --- a/tools/stats/upload_dynamo_perf_stats.py +++ b/tools/stats/upload_dynamo_perf_stats.py @@ -95,7 +95,9 @@ def generate_partition_key(repo: str, doc: dict[str, Any]) -> str: test_name = doc["test_name"] filename = doc["filename"] - hash_content = hashlib.md5(json.dumps(doc).encode("utf-8")).hexdigest() + hash_content = hashlib.md5( + json.dumps(doc).encode("utf-8"), usedforsecurity=False + ).hexdigest() return f"{repo}/{workflow_id}/{job_id}/{test_name}/{filename}/{hash_content}" diff --git a/torch/_logging/_internal.py b/torch/_logging/_internal.py index b09ea79653e..ae74b121302 100644 --- a/torch/_logging/_internal.py +++ b/torch/_logging/_internal.py @@ -1275,7 +1275,7 @@ def trace_structured( # force newlines so we are unlikely to overflow line limit payload = json.dumps(payload, default=json_default, indent=0) - h = hashlib.md5() + h = hashlib.md5(usedforsecurity=False) h.update(payload.encode("utf-8")) record["has_payload"] = h.hexdigest() trace_log.debug( diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 73469181a12..26ec5fecb27 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -4736,7 +4736,7 @@ def _hash_ranks_to_str(ranks: list[int]) -> str: rank_join: str = "_".join(map(str, ranks)) # In case there is already a PG with the same rank composition unique_str = "_".join([rank_join, str(len(_world.pg_names))]) - return hashlib.sha1(bytes(unique_str, "utf-8")).hexdigest() + return hashlib.sha1(bytes(unique_str, "utf-8"), usedforsecurity=False).hexdigest() # Takes a list of ranks and computes an integer color diff --git a/torch/fx/passes/graph_drawer.py b/torch/fx/passes/graph_drawer.py index ab696837bb1..275b0d5f6f9 100644 --- a/torch/fx/passes/graph_drawer.py +++ b/torch/fx/passes/graph_drawer.py @@ -165,7 +165,12 @@ if HAS_PYDOT: else: # Use a random color for each node; based on its name so it's stable. target_name = node._pretty_print_target(node.target) - target_hash = int(hashlib.md5(target_name.encode()).hexdigest()[:8], 16) + target_hash = int( + hashlib.md5( + target_name.encode(), usedforsecurity=False + ).hexdigest()[:8], + 16, + ) template["fillcolor"] = _HASH_COLOR_MAP[ target_hash % len(_HASH_COLOR_MAP) ] diff --git a/torch/utils/_config_module.py b/torch/utils/_config_module.py index bd74462747a..48dd1425b37 100644 --- a/torch/utils/_config_module.py +++ b/torch/utils/_config_module.py @@ -584,7 +584,9 @@ class ConfigModule(ModuleType): if self._is_dirty or self._hash_digest is None: dict_to_hash = self._get_dict(ignored_keys=list(self._compile_ignored_keys)) string_to_hash = repr(sorted(dict_to_hash.items())) - self._hash_digest = hashlib.md5(string_to_hash.encode("utf-8")).digest() + self._hash_digest = hashlib.md5( + string_to_hash.encode("utf-8"), usedforsecurity=False + ).digest() self._is_dirty = False return self._hash_digest diff --git a/torch/utils/_content_store.py b/torch/utils/_content_store.py index 45ac381e0f7..80146dddd2d 100644 --- a/torch/utils/_content_store.py +++ b/torch/utils/_content_store.py @@ -105,7 +105,7 @@ def hash_storage(storage: torch.UntypedStorage, *, stable_hash: bool = False) -> buf = (ctypes.c_byte * cpu_storage.nbytes()).from_address( cpu_storage.data_ptr() ) - sha1 = hashlib.sha1() + sha1 = hashlib.sha1(usedforsecurity=False) sha1.update(buf) return sha1.hexdigest() diff --git a/torchgen/utils.py b/torchgen/utils.py index 9647c05bb1e..f735ef49fcd 100644 --- a/torchgen/utils.py +++ b/torchgen/utils.py @@ -110,7 +110,7 @@ def _read_template(template_fn: str) -> CodeTemplate: # String hash that's stable across different executions, unlike builtin hash def string_stable_hash(s: str) -> int: - sha1 = hashlib.sha1(s.encode("latin1")).digest() + sha1 = hashlib.sha1(s.encode("latin1"), usedforsecurity=False).digest() return int.from_bytes(sha1, byteorder="little")