upload test stats: remove nan/inf when uploading (#136877)

`json.dumps(float("inf"))` returns `Infinity`, which is technically invalid json

This is fine if you json.load, but ClickHouse cannot handle it

Solution here: cast inf and nan to string (which ClickHouse is able to cast back to float)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/136877
Approved by: https://github.com/huydhn
This commit is contained in:
Catherine Lee 2024-10-01 21:47:46 +00:00 committed by PyTorch MergeBot
parent 0788d016d6
commit 6baee60e3c
3 changed files with 50 additions and 4 deletions

View File

@ -3,6 +3,7 @@ from __future__ import annotations
import gzip
import io
import json
import math
import os
import time
import zipfile
@ -199,6 +200,23 @@ def read_from_s3(
return [json.loads(result) for result in results if result]
def remove_nan_inf(old: Any) -> Any:
# Casta NaN, inf, -inf to string from float since json.dumps outputs invalid
# json with them
def _helper(o: Any) -> Any:
if isinstance(o, float) and (math.isinf(o) or math.isnan(o)):
return str(o)
if isinstance(o, list):
return [_helper(v) for v in o]
if isinstance(o, dict):
return {_helper(k): _helper(v) for k, v in o.items()}
if isinstance(o, tuple):
return tuple(_helper(v) for v in o)
return o
return _helper(old)
def upload_workflow_stats_to_s3(
workflow_run_id: int,
workflow_run_attempt: int,

View File

@ -13,6 +13,7 @@ from tools.stats.test_dashboard import upload_additional_info
from tools.stats.upload_stats_lib import (
download_s3_artifacts,
get_job_id,
remove_nan_inf,
unzip,
upload_workflow_stats_to_s3,
)
@ -266,7 +267,7 @@ if __name__ == "__main__":
args.workflow_run_id,
args.workflow_run_attempt,
"test_run_summary",
test_case_summary,
remove_nan_inf(test_case_summary),
)
# Separate out the failed test cases.
@ -281,13 +282,16 @@ if __name__ == "__main__":
args.workflow_run_id,
args.workflow_run_attempt,
"failed_test_runs",
failed_tests_cases,
remove_nan_inf(failed_tests_cases),
)
if args.head_branch == "main" and args.head_repository == "pytorch/pytorch":
# For jobs on main branch, upload everything.
upload_workflow_stats_to_s3(
args.workflow_run_id, args.workflow_run_attempt, "test_run", test_cases
args.workflow_run_id,
args.workflow_run_attempt,
"test_run",
remove_nan_inf(test_cases),
)
upload_additional_info(args.workflow_run_id, args.workflow_run_attempt, test_cases)

View File

@ -2,6 +2,7 @@ from __future__ import annotations
import decimal
import inspect
import json
import sys
import unittest
from pathlib import Path
@ -13,7 +14,7 @@ REPO_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(REPO_ROOT))
from tools.stats.upload_metrics import add_global_metric, emit_metric
from tools.stats.upload_stats_lib import BATCH_SIZE, upload_to_rockset
from tools.stats.upload_stats_lib import BATCH_SIZE, remove_nan_inf, upload_to_rockset
sys.path.remove(str(REPO_ROOT))
@ -335,6 +336,29 @@ class TestUploadStats(unittest.TestCase):
expected_number_of_requests,
)
def test_remove_nan_inf(self) -> None:
checks = [
(float("inf"), '"inf"', "Infinity"),
(float("nan"), '"nan"', "NaN"),
({1: float("inf")}, '{"1": "inf"}', '{"1": Infinity}'),
([float("nan")], '["nan"]', "[NaN]"),
({1: [float("nan")]}, '{"1": ["nan"]}', '{"1": [NaN]}'),
]
for input, clean, unclean in checks:
clean_output = json.dumps(remove_nan_inf(input))
unclean_output = json.dumps(input)
self.assertEqual(
clean_output,
clean,
f"Expected {clean} when input is {unclean}, got {clean_output}",
)
self.assertEqual(
unclean_output,
unclean,
f"Expected {unclean} when input is {unclean}, got {unclean_output}",
)
if __name__ == "__main__":
unittest.main()