Fix sharding algo + test it (#53942)

Summary: This PR: 1. moves sharding algorithm from run_test.py to framework_utils.py (let me know if you have a better place for it) 2. adds tests for the algorithm in test_testing.py 3. fixes the algorithm so that it doesn't tack on the unknown jobs all to the shard with the minimum time, but instead distributes them around the shards. Pull Request resolved: https://github.com/pytorch/pytorch/pull/53942 Test Plan: python test/test_testing.py -k TestFrameworkUtils Reviewed By: samestep Differential Revision: D27047223 Pulled By: janeyx99 fbshipit-source-id: 824b20009c0bb707aa5361de445cdec795d5e3f1
2025-12-06 12:20:52 +01:00 · 2021-03-15 16:31:47 -07:00 · 2021-03-15 16:31:47 -07:00 · ee35060888
commit ee35060888
parent e91aeb0470
4 changed files with 126 additions and 18 deletions
--- a/mypy-strict.ini
+++ b/mypy-strict.ini
@ -38,6 +38,7 @@ files = tools/codegen/gen.py,
    tools/autograd/*.py,
    tools/pyi/*.py,
    tools/test_history.py,
    torch/testing/_internal/framework_utils.py,
    torch/testing/_internal/mypy_wrapper.py,
    torch/testing/_internal/print_test_stats.py,
    torch/utils/benchmark/utils/common.py,
--- a/test/run_test.py
+++ b/test/run_test.py
@ -16,6 +16,7 @@ import tempfile
 import torch
 from torch.utils import cpp_extension
 from torch.testing._internal.common_utils import TEST_WITH_ROCM, shell, set_cwd, FILE_SCHEMA
 from torch.testing._internal.framework_utils import calculate_shards
 import torch.distributed as dist
 from typing import Dict, Optional, Tuple, List, Any
@ -421,25 +422,7 @@ def calculate_job_times(reports: List[Dict[str, Any]]) -> Dict[str, Tuple[float,
    return jobs_to_times
 def calculate_shards(num_shards: int, tests: List[str], job_times: Dict[str, Tuple[float, int]]) -> List[Tuple[float, List[str]]]:
    filtered_job_times: Dict[str, float] = dict()
    for test in tests:
        if test in job_times:
            avg_time, _ = job_times[test]
            filtered_job_times[test] = avg_time
        else:
            filtered_job_times[test] = 0.0
    # The following attempts to implement a partition approximation greedy algorithm
    # See more at https://en.wikipedia.org/wiki/Greedy_number_partitioning
    sorted_jobs = sorted(filtered_job_times, key=lambda j: filtered_job_times[j], reverse=True)
    sharded_jobs: List[Tuple[float, List[str]]] = [(0.0, []) for _ in range(num_shards)]
    for job in sorted_jobs:
        min_shard_index = sorted(range(num_shards), key=lambda i: sharded_jobs[i][0])[0]
        curr_shard_time, curr_shard_jobs = sharded_jobs[min_shard_index]
        curr_shard_jobs.append(job)
        sharded_jobs[min_shard_index] = (curr_shard_time + filtered_job_times[job], curr_shard_jobs)
    return sharded_jobs
 def pull_job_times_from_S3() -> Dict[str, Tuple[float, int]]:
--- a/test/test_testing.py
+++ b/test/test_testing.py
@ -2,9 +2,11 @@ import torch
 import math
 from pathlib import PurePosixPath
 import random
 from torch.testing._internal.common_utils import \
    (TestCase, make_tensor, run_tests, slowTest)
 from torch.testing._internal.framework_utils import calculate_shards
 from torch.testing._internal.common_device_type import \
    (instantiate_device_type_tests, onlyCUDA, onlyOnCPUAndCUDA, dtypes)
 from torch.testing._internal import mypy_wrapper
@ -1305,5 +1307,99 @@ Added    (across    1 suite)      2 tests, totaling +   3.02s
        )
 class TestFrameworkUtils(TestCase):
    tests = [
        'super_long_test',
        'long_test1',
        'long_test2',
        'normal_test1',
        'normal_test2',
        'normal_test3',
        'short_test1',
        'short_test2',
        'short_test3',
        'short_test4',
        'short_test5',
    ]
    test_times = {
        'super_long_test': (55, 1),
        'long_test1': (22, 2),
        'long_test2': (18, 2),
        'normal_test1': (9, 2),
        'normal_test2': (7, 2),
        'normal_test3': (5, 2),
        'short_test1': (1, 2),
        'short_test2': (0.6, 3),
        'short_test3': (0.4, 5),
        'short_test4': (0.3, 1),
        'short_test5': (0.01, 2),
    }
    def test_calculate_2_shards_with_complete_test_times(self):
        expected_shards = [
            (60, ['super_long_test', 'normal_test3']),
            (58.31, ['long_test1', 'long_test2', 'normal_test1', 'normal_test2', 'short_test1', 'short_test2',
                     'short_test3', 'short_test4', 'short_test5'])
        ]
        self.assertEqual(expected_shards, calculate_shards(2, self.tests, self.test_times))
    def test_calculate_5_shards_with_complete_test_times(self):
        expected_shards = [
            (55, ['super_long_test']),
            (22, ['long_test1', ]),
            (18, ['long_test2', ]),
            (11.31, ['normal_test1', 'short_test1', 'short_test2', 'short_test3', 'short_test4', 'short_test5']),
            (12, ['normal_test2', 'normal_test3']),
        ]
        self.assertEqual(expected_shards, calculate_shards(5, self.tests, self.test_times))
    def test_calculate_2_shards_with_incomplete_test_times(self):
        incomplete_test_times = {k: v for k, v in self.test_times.items() if 'test1' in k}
        expected_shards = [
            (22, ['long_test1', 'long_test2', 'normal_test3', 'short_test3', 'short_test5']),
            (10, ['normal_test1', 'short_test1', 'super_long_test', 'normal_test2', 'short_test2', 'short_test4']),
        ]
        self.assertEqual(expected_shards, calculate_shards(2, self.tests, incomplete_test_times))
    def test_calculate_5_shards_with_incomplete_test_times(self):
        incomplete_test_times = {k: v for k, v in self.test_times.items() if 'test1' in k}
        expected_shards = [
            (22, ['long_test1', 'normal_test2', 'short_test5']),
            (9, ['normal_test1', 'normal_test3']),
            (1, ['short_test1', 'short_test2']),
            (0, ['super_long_test', 'short_test3']),
            (0, ['long_test2', 'short_test4']),
        ]
        self.assertEqual(expected_shards, calculate_shards(5, self.tests, incomplete_test_times))
    def test_calculate_2_shards_against_optimal_shards(self):
        for _ in range(100):
            random.seed(120)
            random_times = {k: (random.random() * 10, 1) for k in self.tests}
            # all test times except first two
            rest_of_tests = [i for (k, (i, _)) in random_times.items() if k != 'super_long_test' and k != 'long_test1']
            sum_of_rest = sum(rest_of_tests)
            random_times['super_long_test'] = (max(sum_of_rest / 2, max(rest_of_tests)), 1)
            random_times['long_test1'] = (sum_of_rest - random_times['super_long_test'][0], 1)
            # An optimal sharding would look like the below, but we don't need to compute this for the test:
            # optimal_shards = [
            #     (sum_of_rest, ['super_long_test', 'long_test1']),
            #     (sum_of_rest, [i for i in self.tests if i != 'super_long_test' and i != 'long_test1']),
            # ]
            calculated_shards = calculate_shards(2, self.tests, random_times)
            max_shard_time = max(calculated_shards[0][0], calculated_shards[1][0])
            if sum_of_rest != 0:
                # The calculated shard should not have a ratio worse than 7/6 for num_shards = 2
                self.assertGreaterEqual(7.0 / 6.0, max_shard_time / sum_of_rest)
                sorted_tests = sorted(self.tests)
                sorted_shard_tests = sorted(calculated_shards[0][1] + calculated_shards[1][1])
                # All the tests should be represented by some shard
                self.assertEqual(sorted_tests, sorted_shard_tests)
 if __name__ == '__main__':
    run_tests()
--- a/torch/testing/_internal/framework_utils.py
+++ b/torch/testing/_internal/framework_utils.py
@ -0,0 +1,28 @@
 from typing import Dict, Tuple, List
 def calculate_shards(num_shards: int, tests: List[str], job_times: Dict[str, Tuple[float, int]]) -> List[Tuple[float, List[str]]]:
    filtered_job_times: Dict[str, float] = dict()
    unknown_jobs : List[str] = []
    for test in tests:
        if test in job_times:
            avg_time, _ = job_times[test]
            filtered_job_times[test] = avg_time
        else:
            unknown_jobs.append(test)
    # The following attempts to implement a partition approximation greedy algorithm
    # See more at https://en.wikipedia.org/wiki/Greedy_number_partitioning
    sorted_jobs = sorted(filtered_job_times, key=lambda j: filtered_job_times[j], reverse=True)
    sharded_jobs: List[Tuple[float, List[str]]] = [(0.0, []) for _ in range(num_shards)]
    for job in sorted_jobs:
        min_shard_index = sorted(range(num_shards), key=lambda i: sharded_jobs[i][0])[0]
        curr_shard_time, curr_shard_jobs = sharded_jobs[min_shard_index]
        curr_shard_jobs.append(job)
        sharded_jobs[min_shard_index] = (curr_shard_time + filtered_job_times[job], curr_shard_jobs)
    # Round robin the unknown jobs starting with the smallest shard
    index = sorted(range(num_shards), key=lambda i: sharded_jobs[i][0])[0]
    for job in unknown_jobs:
        sharded_jobs[index][1].append(job)
        index = (index + 1) % num_shards
    return sharded_jobs