mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
upload test stats: remove nan/inf when uploading (#136877)
`json.dumps(float("inf"))` returns `Infinity`, which is technically invalid json
This is fine if you json.load, but ClickHouse cannot handle it
Solution here: cast inf and nan to string (which ClickHouse is able to cast back to float)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/136877
Approved by: https://github.com/huydhn
This commit is contained in:
parent
0788d016d6
commit
6baee60e3c
|
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
|||
import gzip
|
||||
import io
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import time
|
||||
import zipfile
|
||||
|
|
@ -199,6 +200,23 @@ def read_from_s3(
|
|||
return [json.loads(result) for result in results if result]
|
||||
|
||||
|
||||
def remove_nan_inf(old: Any) -> Any:
|
||||
# Casta NaN, inf, -inf to string from float since json.dumps outputs invalid
|
||||
# json with them
|
||||
def _helper(o: Any) -> Any:
|
||||
if isinstance(o, float) and (math.isinf(o) or math.isnan(o)):
|
||||
return str(o)
|
||||
if isinstance(o, list):
|
||||
return [_helper(v) for v in o]
|
||||
if isinstance(o, dict):
|
||||
return {_helper(k): _helper(v) for k, v in o.items()}
|
||||
if isinstance(o, tuple):
|
||||
return tuple(_helper(v) for v in o)
|
||||
return o
|
||||
|
||||
return _helper(old)
|
||||
|
||||
|
||||
def upload_workflow_stats_to_s3(
|
||||
workflow_run_id: int,
|
||||
workflow_run_attempt: int,
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from tools.stats.test_dashboard import upload_additional_info
|
|||
from tools.stats.upload_stats_lib import (
|
||||
download_s3_artifacts,
|
||||
get_job_id,
|
||||
remove_nan_inf,
|
||||
unzip,
|
||||
upload_workflow_stats_to_s3,
|
||||
)
|
||||
|
|
@ -266,7 +267,7 @@ if __name__ == "__main__":
|
|||
args.workflow_run_id,
|
||||
args.workflow_run_attempt,
|
||||
"test_run_summary",
|
||||
test_case_summary,
|
||||
remove_nan_inf(test_case_summary),
|
||||
)
|
||||
|
||||
# Separate out the failed test cases.
|
||||
|
|
@ -281,13 +282,16 @@ if __name__ == "__main__":
|
|||
args.workflow_run_id,
|
||||
args.workflow_run_attempt,
|
||||
"failed_test_runs",
|
||||
failed_tests_cases,
|
||||
remove_nan_inf(failed_tests_cases),
|
||||
)
|
||||
|
||||
if args.head_branch == "main" and args.head_repository == "pytorch/pytorch":
|
||||
# For jobs on main branch, upload everything.
|
||||
upload_workflow_stats_to_s3(
|
||||
args.workflow_run_id, args.workflow_run_attempt, "test_run", test_cases
|
||||
args.workflow_run_id,
|
||||
args.workflow_run_attempt,
|
||||
"test_run",
|
||||
remove_nan_inf(test_cases),
|
||||
)
|
||||
|
||||
upload_additional_info(args.workflow_run_id, args.workflow_run_attempt, test_cases)
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
|||
|
||||
import decimal
|
||||
import inspect
|
||||
import json
|
||||
import sys
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
|
@ -13,7 +14,7 @@ REPO_ROOT = Path(__file__).resolve().parent.parent.parent
|
|||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
from tools.stats.upload_metrics import add_global_metric, emit_metric
|
||||
from tools.stats.upload_stats_lib import BATCH_SIZE, upload_to_rockset
|
||||
from tools.stats.upload_stats_lib import BATCH_SIZE, remove_nan_inf, upload_to_rockset
|
||||
|
||||
|
||||
sys.path.remove(str(REPO_ROOT))
|
||||
|
|
@ -335,6 +336,29 @@ class TestUploadStats(unittest.TestCase):
|
|||
expected_number_of_requests,
|
||||
)
|
||||
|
||||
def test_remove_nan_inf(self) -> None:
|
||||
checks = [
|
||||
(float("inf"), '"inf"', "Infinity"),
|
||||
(float("nan"), '"nan"', "NaN"),
|
||||
({1: float("inf")}, '{"1": "inf"}', '{"1": Infinity}'),
|
||||
([float("nan")], '["nan"]', "[NaN]"),
|
||||
({1: [float("nan")]}, '{"1": ["nan"]}', '{"1": [NaN]}'),
|
||||
]
|
||||
|
||||
for input, clean, unclean in checks:
|
||||
clean_output = json.dumps(remove_nan_inf(input))
|
||||
unclean_output = json.dumps(input)
|
||||
self.assertEqual(
|
||||
clean_output,
|
||||
clean,
|
||||
f"Expected {clean} when input is {unclean}, got {clean_output}",
|
||||
)
|
||||
self.assertEqual(
|
||||
unclean_output,
|
||||
unclean,
|
||||
f"Expected {unclean} when input is {unclean}, got {unclean_output}",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user