[BE] Add sharding data by default to metrics (#110035)

Extend metric library to allow setting global metrics on a process level which will always be emitted. Current use case for them is to include shard information every time a metric is emitted by run_test.py  ### <samp>🤖 Generated by Copilot at 0cae92c</samp> > _`run_test` refactored_ > _Sharding metrics in Rockset_ > _Autumn of testing_ Pull Request resolved: https://github.com/pytorch/pytorch/pull/110035 Approved by: https://github.com/clee2000
2025-12-06 12:20:52 +01:00 · 2023-09-26 17:06:49 +00:00 · 2023-09-26 17:06:49 +00:00 · 1277d0e834
commit 1277d0e834
parent d91492a7a4
3 changed files with 106 additions and 9 deletions
--- a/test/run_test.py
+++ b/test/run_test.py
@ -13,7 +13,7 @@ import sys
 import tempfile
 import time
 from datetime import datetime
-from typing import Any, cast, Dict, List, NamedTuple, Optional, Union
+from typing import Any, cast, Dict, List, NamedTuple, Optional, Tuple, Union
 import pkg_resources
@ -40,7 +40,7 @@ REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent
 # using tools/ to optimize test run.
 sys.path.insert(0, str(REPO_ROOT))
 from tools.stats.export_test_times import TEST_TIMES_FILE
-from tools.stats.upload_metrics import emit_metric
+from tools.stats.upload_metrics import add_global_metric, emit_metric
 from tools.testing.target_determination.determinator import (
    AggregatedHeuristics,
    get_test_prioritizations,
@ -1438,12 +1438,7 @@ def download_test_times(file: str = TEST_TIMES_FILE) -> Dict[str, float]:
        return test_times_file["default"]["default"]
-def do_sharding(
+def get_sharding_opts(options) -> Tuple[int, int]:
    options,
    selected_tests: List[str],
    test_file_times: Dict[str, float],
    sort_by_time: bool = True,
 ) -> List[ShardedTest]:
    which_shard, num_shards = 1, 1
    if options.shard:
        assert len(options.shard) == 2, "Unexpected shard format"
@ -1453,6 +1448,17 @@ def do_sharding(
            which_shard <= num_shards
        ), "Selected shard must be less than or equal to total number of shards"
    return (which_shard, num_shards)
 def do_sharding(
    options,
    selected_tests: List[str],
    test_file_times: Dict[str, float],
    sort_by_time: bool = True,
 ) -> List[ShardedTest]:
    which_shard, num_shards = get_sharding_opts(options)
    # Do sharding
    shards = calculate_shards(
        num_shards,
@ -1616,6 +1622,11 @@ def main():
    options = parse_args()
    # Include sharding info in all metrics
    which_shard, num_shards = get_sharding_opts(options)
    add_global_metric("shard", which_shard)
    add_global_metric("num_shards", num_shards)
    test_directory = str(REPO_ROOT / "test")
    selected_tests = get_selected_tests(options)
--- a/tools/stats/upload_metrics.py
+++ b/tools/stats/upload_metrics.py
@ -59,6 +59,18 @@ class EnvVarMetric:
        return value
 global_metrics: Dict[str, Any] = {}
 def add_global_metric(metric_name: str, metric_value: Any) -> None:
    """
    Adds stats that should be emitted with every metric by the current process.
    If the emit_metrics method specifies a metric with the same name, it will
    overwrite this value.
    """
    global_metrics[metric_name] = metric_value
 def emit_metric(
    metric_name: str,
    metrics: Dict[str, Any],
@ -83,6 +95,10 @@ def emit_metric(
    if metrics is None:
        raise ValueError("You didn't ask to upload any metrics!")
    # Merge the given metrics with the global metrics, overwriting any duplicates
    # with the given metrics.
    metrics = {**global_metrics, **metrics}
    # We use these env vars that to determine basic info about the workflow run.
    # By using env vars, we don't have to pass this info around to every function.
    # It also helps ensure that we only emit metrics during CI
--- a/tools/test/test_upload_stats_lib.py
+++ b/tools/test/test_upload_stats_lib.py
@ -4,7 +4,7 @@ import unittest
 from typing import Any, Dict
 from unittest import mock
-from tools.stats.upload_metrics import emit_metric
+from tools.stats.upload_metrics import add_global_metric, emit_metric
 from tools.stats.upload_stats_lib import BATCH_SIZE, upload_to_rockset
@ -85,6 +85,76 @@ class TestUploadStats(unittest.TestCase):
            {**emit_should_include, **emitted_metric},
        )
    @mock.patch("boto3.Session.resource")
    def test_when_global_metric_specified_then_it_emits_it(
        self, mock_resource: Any
    ) -> None:
        metric = {
            "some_number": 123,
        }
        global_metric_name = "global_metric"
        global_metric_value = "global_value"
        add_global_metric(global_metric_name, global_metric_value)
        emit_should_include = {
            **metric,
            global_metric_name: global_metric_value,
        }
        # Preserve the metric emitted
        emitted_metric: Dict[str, Any] = {}
        def mock_put_item(Item: Dict[str, Any]) -> None:
            nonlocal emitted_metric
            emitted_metric = Item
        mock_resource.return_value.Table.return_value.put_item = mock_put_item
        emit_metric("metric_name", metric)
        self.assertEqual(
            emitted_metric,
            {**emitted_metric, **emit_should_include},
        )
    @mock.patch("boto3.Session.resource")
    def test_when_local_and_global_metric_specified_then_global_is_overridden(
        self, mock_resource: Any
    ) -> None:
        global_metric_name = "global_metric"
        global_metric_value = "global_value"
        local_override = "local_override"
        add_global_metric(global_metric_name, global_metric_value)
        metric = {
            "some_number": 123,
            global_metric_name: local_override,
        }
        emit_should_include = {
            **metric,
            global_metric_name: local_override,
        }
        # Preserve the metric emitted
        emitted_metric: Dict[str, Any] = {}
        def mock_put_item(Item: Dict[str, Any]) -> None:
            nonlocal emitted_metric
            emitted_metric = Item
        mock_resource.return_value.Table.return_value.put_item = mock_put_item
        emit_metric("metric_name", metric)
        self.assertEqual(
            emitted_metric,
            {**emitted_metric, **emit_should_include},
        )
    @mock.patch("boto3.Session.resource")
    def test_when_optional_envvar_set_to_actual_value_then_emit_vars_emits_it(
        self, mock_resource: Any