mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Enable all PIE rules on ruff (#165814)
This PR enables all PIE rules on ruff, there are already some enabled rules from this family, the new added rules are
```
PIE796 Enum contains duplicate value: {value}
PIE808 Unnecessary start argument in range
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/165814
Approved by: https://github.com/ezyang
This commit is contained in:
parent
e595136187
commit
c79dfdc655
|
|
@ -85,7 +85,7 @@ class WeightOnlyInt8QuantHandler:
|
||||||
cur_state_dict[f"{fqn}.weight"] = int8_weight
|
cur_state_dict[f"{fqn}.weight"] = int8_weight
|
||||||
cur_state_dict[f"{fqn}.scales"] = scales.to(mod.weight.dtype)
|
cur_state_dict[f"{fqn}.scales"] = scales.to(mod.weight.dtype)
|
||||||
elif isinstance(mod, ConditionalFeedForward):
|
elif isinstance(mod, ConditionalFeedForward):
|
||||||
for weight_idx in range(0, 3):
|
for weight_idx in range(3):
|
||||||
weight_name = f"w{weight_idx + 1}"
|
weight_name = f"w{weight_idx + 1}"
|
||||||
scales_name = f"scales{weight_idx + 1}"
|
scales_name = f"scales{weight_idx + 1}"
|
||||||
weight = getattr(mod, weight_name)
|
weight = getattr(mod, weight_name)
|
||||||
|
|
|
||||||
|
|
@ -204,12 +204,7 @@ select = [
|
||||||
"NPY",
|
"NPY",
|
||||||
"PERF",
|
"PERF",
|
||||||
"PGH004",
|
"PGH004",
|
||||||
"PIE790",
|
"PIE",
|
||||||
"PIE794",
|
|
||||||
"PIE800",
|
|
||||||
"PIE804",
|
|
||||||
"PIE807",
|
|
||||||
"PIE810",
|
|
||||||
"PLC0131", # type bivariance
|
"PLC0131", # type bivariance
|
||||||
"PLC0132", # type param mismatch
|
"PLC0132", # type param mismatch
|
||||||
"PLC1802", # len({expression}) used as condition without comparison
|
"PLC1802", # len({expression}) used as condition without comparison
|
||||||
|
|
|
||||||
|
|
@ -190,7 +190,7 @@ class TestActivationSparsifier(TestCase):
|
||||||
if features is None:
|
if features is None:
|
||||||
assert torch.all(mask * input_data == output)
|
assert torch.all(mask * input_data == output)
|
||||||
else:
|
else:
|
||||||
for feature_idx in range(0, len(features)):
|
for feature_idx in range(len(features)):
|
||||||
feature = torch.Tensor(
|
feature = torch.Tensor(
|
||||||
[features[feature_idx]], device=input_data.device
|
[features[feature_idx]], device=input_data.device
|
||||||
).long()
|
).long()
|
||||||
|
|
@ -378,7 +378,7 @@ class TestActivationSparsifier(TestCase):
|
||||||
# some dummy data
|
# some dummy data
|
||||||
data_list = []
|
data_list = []
|
||||||
num_data_points = 5
|
num_data_points = 5
|
||||||
for _ in range(0, num_data_points):
|
for _ in range(num_data_points):
|
||||||
rand_data = torch.randn(16, 1, 28, 28)
|
rand_data = torch.randn(16, 1, 28, 28)
|
||||||
activation_sparsifier.model(rand_data)
|
activation_sparsifier.model(rand_data)
|
||||||
data_list.append(rand_data)
|
data_list.append(rand_data)
|
||||||
|
|
|
||||||
|
|
@ -143,7 +143,7 @@ class TestBaseDataScheduler(TestCase):
|
||||||
|
|
||||||
# checking step count
|
# checking step count
|
||||||
step_cnt = 5
|
step_cnt = 5
|
||||||
for _ in range(0, step_cnt):
|
for _ in range(step_cnt):
|
||||||
sparsifier.step()
|
sparsifier.step()
|
||||||
scheduler.step()
|
scheduler.step()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -123,7 +123,7 @@ class _BaseDataSparsiferTestCase(TestCase):
|
||||||
|
|
||||||
step_count = 3
|
step_count = 3
|
||||||
|
|
||||||
for _ in range(0, step_count):
|
for _ in range(step_count):
|
||||||
sparsifier.step()
|
sparsifier.step()
|
||||||
for some_data in all_data:
|
for some_data in all_data:
|
||||||
name, data, _ = self._get_name_data_config(some_data)
|
name, data, _ = self._get_name_data_config(some_data)
|
||||||
|
|
|
||||||
|
|
@ -472,8 +472,8 @@ class TestNearlyDiagonalSparsifier(TestCase):
|
||||||
else:
|
else:
|
||||||
height, width = mask.shape
|
height, width = mask.shape
|
||||||
dist_to_diagonal = nearliness // 2
|
dist_to_diagonal = nearliness // 2
|
||||||
for row in range(0, height):
|
for row in range(height):
|
||||||
for col in range(0, width):
|
for col in range(width):
|
||||||
if abs(row - col) <= dist_to_diagonal:
|
if abs(row - col) <= dist_to_diagonal:
|
||||||
assert mask[row, col] == 1
|
assert mask[row, col] == 1
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -79,7 +79,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
|
||||||
dist.init_process_group(
|
dist.init_process_group(
|
||||||
store=store, rank=self.rank, world_size=self.world_size, backend="gloo"
|
store=store, rank=self.rank, world_size=self.world_size, backend="gloo"
|
||||||
)
|
)
|
||||||
group = list(range(0, self.world_size))
|
group = list(range(self.world_size))
|
||||||
group_id = dist.group.WORLD
|
group_id = dist.group.WORLD
|
||||||
self._test_all_gather(
|
self._test_all_gather(
|
||||||
group, group_id, self.rank, dtype=torch.float32, qtype=DQuantType.FP16
|
group, group_id, self.rank, dtype=torch.float32, qtype=DQuantType.FP16
|
||||||
|
|
@ -94,7 +94,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
|
||||||
dist.init_process_group(
|
dist.init_process_group(
|
||||||
store=store, rank=self.rank, world_size=self.world_size, backend="gloo"
|
store=store, rank=self.rank, world_size=self.world_size, backend="gloo"
|
||||||
)
|
)
|
||||||
group = list(range(0, self.world_size))
|
group = list(range(self.world_size))
|
||||||
group_id = dist.group.WORLD
|
group_id = dist.group.WORLD
|
||||||
self._test_all_gather(
|
self._test_all_gather(
|
||||||
group, group_id, self.rank, dtype=torch.float32, qtype=DQuantType.BFP16
|
group, group_id, self.rank, dtype=torch.float32, qtype=DQuantType.BFP16
|
||||||
|
|
@ -111,7 +111,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
|
||||||
dist.init_process_group(
|
dist.init_process_group(
|
||||||
store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
|
store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
|
||||||
)
|
)
|
||||||
group = list(range(0, self.world_size))
|
group = list(range(self.world_size))
|
||||||
group_id = dist.new_group(range(self.world_size))
|
group_id = dist.new_group(range(self.world_size))
|
||||||
rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
|
rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
|
||||||
self._test_all_to_all(
|
self._test_all_to_all(
|
||||||
|
|
@ -135,7 +135,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
|
||||||
dist.init_process_group(
|
dist.init_process_group(
|
||||||
store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
|
store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
|
||||||
)
|
)
|
||||||
group = list(range(0, self.world_size))
|
group = list(range(self.world_size))
|
||||||
group_id = dist.new_group(range(self.world_size))
|
group_id = dist.new_group(range(self.world_size))
|
||||||
rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
|
rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
|
||||||
self._test_all_to_all(
|
self._test_all_to_all(
|
||||||
|
|
@ -158,7 +158,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
|
||||||
dist.init_process_group(
|
dist.init_process_group(
|
||||||
store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
|
store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
|
||||||
)
|
)
|
||||||
group = list(range(0, self.world_size))
|
group = list(range(self.world_size))
|
||||||
group_id = dist.new_group(range(self.world_size))
|
group_id = dist.new_group(range(self.world_size))
|
||||||
rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
|
rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
|
||||||
self._test_all_to_all_single(
|
self._test_all_to_all_single(
|
||||||
|
|
@ -181,7 +181,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
|
||||||
dist.init_process_group(
|
dist.init_process_group(
|
||||||
store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
|
store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
|
||||||
)
|
)
|
||||||
group = list(range(0, self.world_size))
|
group = list(range(self.world_size))
|
||||||
group_id = dist.new_group(range(self.world_size))
|
group_id = dist.new_group(range(self.world_size))
|
||||||
rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
|
rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
|
||||||
self._test_all_to_all_single(
|
self._test_all_to_all_single(
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,7 @@ if TEST_WITH_DEV_DBG_ASAN:
|
||||||
def create_sharded_tensor(rank, world_size, shards_per_rank, shard_size=8):
|
def create_sharded_tensor(rank, world_size, shards_per_rank, shard_size=8):
|
||||||
shards_metadata = []
|
shards_metadata = []
|
||||||
local_shards = []
|
local_shards = []
|
||||||
for idx in range(0, world_size * shards_per_rank):
|
for idx in range(world_size * shards_per_rank):
|
||||||
shard_rank = idx // shards_per_rank
|
shard_rank = idx // shards_per_rank
|
||||||
shard_md = ShardMetadata(
|
shard_md = ShardMetadata(
|
||||||
shard_offsets=[idx * shard_size],
|
shard_offsets=[idx * shard_size],
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ if TEST_WITH_DEV_DBG_ASAN:
|
||||||
def create_sharded_tensor(rank, world_size, shards_per_rank):
|
def create_sharded_tensor(rank, world_size, shards_per_rank):
|
||||||
shards_metadata = []
|
shards_metadata = []
|
||||||
local_shards = []
|
local_shards = []
|
||||||
for idx in range(0, world_size * shards_per_rank):
|
for idx in range(world_size * shards_per_rank):
|
||||||
shard_rank = idx // shards_per_rank
|
shard_rank = idx // shards_per_rank
|
||||||
shard_md = ShardMetadata(
|
shard_md = ShardMetadata(
|
||||||
shard_offsets=[idx * 8], shard_sizes=[8], placement=f"rank:{shard_rank}/cpu"
|
shard_offsets=[idx * 8], shard_sizes=[8], placement=f"rank:{shard_rank}/cpu"
|
||||||
|
|
|
||||||
|
|
@ -633,7 +633,7 @@ class SimpleElasticAgentTest(unittest.TestCase):
|
||||||
worker_group = agent.get_worker_group()
|
worker_group = agent.get_worker_group()
|
||||||
|
|
||||||
num_restarts = 3
|
num_restarts = 3
|
||||||
for _ in range(0, num_restarts):
|
for _ in range(num_restarts):
|
||||||
agent._restart_workers(worker_group)
|
agent._restart_workers(worker_group)
|
||||||
self.assertEqual(WorkerState.HEALTHY, worker_group.state)
|
self.assertEqual(WorkerState.HEALTHY, worker_group.state)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -146,7 +146,7 @@ def echo_large(size: int) -> dict[int, str]:
|
||||||
returns a large output ({0: test0", 1: "test1", ..., (size-1):f"test{size-1}"})
|
returns a large output ({0: test0", 1: "test1", ..., (size-1):f"test{size-1}"})
|
||||||
"""
|
"""
|
||||||
out = {}
|
out = {}
|
||||||
for idx in range(0, size):
|
for idx in range(size):
|
||||||
out[idx] = f"test{idx}"
|
out[idx] = f"test{idx}"
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -191,7 +191,7 @@ if not (IS_WINDOWS or IS_MACOS or IS_ARM64):
|
||||||
"""
|
"""
|
||||||
client = timer.FileTimerClient(file_path)
|
client = timer.FileTimerClient(file_path)
|
||||||
sem.release()
|
sem.release()
|
||||||
for _ in range(0, n):
|
for _ in range(n):
|
||||||
client.acquire("test_scope", 0)
|
client.acquire("test_scope", 0)
|
||||||
time.sleep(interval)
|
time.sleep(interval)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -102,7 +102,7 @@ if not (IS_WINDOWS or IS_MACOS or IS_ARM64):
|
||||||
|
|
||||||
world_size = 8
|
world_size = 8
|
||||||
processes = []
|
processes = []
|
||||||
for i in range(0, world_size):
|
for i in range(world_size):
|
||||||
if i % 2 == 0:
|
if i % 2 == 0:
|
||||||
p = spawn_ctx.Process(target=_stuck_function, args=(i, mp_queue))
|
p = spawn_ctx.Process(target=_stuck_function, args=(i, mp_queue))
|
||||||
else:
|
else:
|
||||||
|
|
@ -110,7 +110,7 @@ if not (IS_WINDOWS or IS_MACOS or IS_ARM64):
|
||||||
p.start()
|
p.start()
|
||||||
processes.append(p)
|
processes.append(p)
|
||||||
|
|
||||||
for i in range(0, world_size):
|
for i in range(world_size):
|
||||||
p = processes[i]
|
p = processes[i]
|
||||||
p.join()
|
p.join()
|
||||||
if i % 2 == 0:
|
if i % 2 == 0:
|
||||||
|
|
|
||||||
|
|
@ -127,7 +127,7 @@ if not INVALID_PLATFORMS:
|
||||||
interval seconds. Releases the given semaphore once before going to work.
|
interval seconds. Releases the given semaphore once before going to work.
|
||||||
"""
|
"""
|
||||||
sem.release()
|
sem.release()
|
||||||
for i in range(0, n):
|
for i in range(n):
|
||||||
mp_queue.put(TimerRequest(i, "test_scope", 0))
|
mp_queue.put(TimerRequest(i, "test_scope", 0))
|
||||||
time.sleep(interval)
|
time.sleep(interval)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ class CyclingIteratorTest(unittest.TestCase):
|
||||||
def generator(self, epoch, stride, max_epochs):
|
def generator(self, epoch, stride, max_epochs):
|
||||||
# generate an continuously incrementing list each epoch
|
# generate an continuously incrementing list each epoch
|
||||||
# e.g. [0,1,2] [3,4,5] [6,7,8] ...
|
# e.g. [0,1,2] [3,4,5] [6,7,8] ...
|
||||||
return iter([stride * epoch + i for i in range(0, stride)])
|
return iter([stride * epoch + i for i in range(stride)])
|
||||||
|
|
||||||
def test_cycling_iterator(self):
|
def test_cycling_iterator(self):
|
||||||
stride = 3
|
stride = 3
|
||||||
|
|
@ -25,7 +25,7 @@ class CyclingIteratorTest(unittest.TestCase):
|
||||||
return self.generator(epoch, stride, max_epochs)
|
return self.generator(epoch, stride, max_epochs)
|
||||||
|
|
||||||
it = CyclingIterator(n=max_epochs, generator_fn=generator_fn)
|
it = CyclingIterator(n=max_epochs, generator_fn=generator_fn)
|
||||||
for i in range(0, stride * max_epochs):
|
for i in range(stride * max_epochs):
|
||||||
self.assertEqual(i, next(it))
|
self.assertEqual(i, next(it))
|
||||||
|
|
||||||
with self.assertRaises(StopIteration):
|
with self.assertRaises(StopIteration):
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,7 @@ class TestFSDPHybridShard(FSDPTest):
|
||||||
model = MyModel().to(device_type)
|
model = MyModel().to(device_type)
|
||||||
num_node_devices = torch.accelerator.device_count()
|
num_node_devices = torch.accelerator.device_count()
|
||||||
shard_rank_lists = (
|
shard_rank_lists = (
|
||||||
list(range(0, num_node_devices // 2)),
|
list(range(num_node_devices // 2)),
|
||||||
list(range(num_node_devices // 2, num_node_devices)),
|
list(range(num_node_devices // 2, num_node_devices)),
|
||||||
)
|
)
|
||||||
shard_groups = (
|
shard_groups = (
|
||||||
|
|
@ -175,7 +175,7 @@ class TestFSDPHybridShard(FSDPTest):
|
||||||
model = MyModel().to(device_type)
|
model = MyModel().to(device_type)
|
||||||
num_node_devices = torch.accelerator.device_count()
|
num_node_devices = torch.accelerator.device_count()
|
||||||
shard_rank_lists = (
|
shard_rank_lists = (
|
||||||
list(range(0, num_node_devices // 2)),
|
list(range(num_node_devices // 2)),
|
||||||
list(range(num_node_devices // 2, num_node_devices)),
|
list(range(num_node_devices // 2, num_node_devices)),
|
||||||
)
|
)
|
||||||
shard_groups = (
|
shard_groups = (
|
||||||
|
|
|
||||||
|
|
@ -802,7 +802,7 @@ class TestLocalDTensorOps(TestDTensorOps):
|
||||||
self.run_opinfo_test(dtype, op)
|
self.run_opinfo_test(dtype, op)
|
||||||
|
|
||||||
def test_mean(self):
|
def test_mean(self):
|
||||||
with LocalTensorMode(frozenset(range(0, self.world_size))):
|
with LocalTensorMode(frozenset(range(self.world_size))):
|
||||||
self.run_mean()
|
self.run_mean()
|
||||||
|
|
||||||
def test_one_hot(self):
|
def test_one_hot(self):
|
||||||
|
|
@ -811,7 +811,7 @@ class TestLocalDTensorOps(TestDTensorOps):
|
||||||
def run_opinfo_test(
|
def run_opinfo_test(
|
||||||
self, dtype, op, requires_grad=True, sample_inputs_filter=lambda s: True
|
self, dtype, op, requires_grad=True, sample_inputs_filter=lambda s: True
|
||||||
):
|
):
|
||||||
with LocalTensorMode(frozenset(range(0, self.world_size))):
|
with LocalTensorMode(frozenset(range(self.world_size))):
|
||||||
super().run_opinfo_test(dtype, op, requires_grad, sample_inputs_filter)
|
super().run_opinfo_test(dtype, op, requires_grad, sample_inputs_filter)
|
||||||
|
|
||||||
def assertEqualOnRank(self, x, y, msg=None, *, rank=0):
|
def assertEqualOnRank(self, x, y, msg=None, *, rank=0):
|
||||||
|
|
|
||||||
|
|
@ -536,7 +536,7 @@ class DeviceMeshTestNDim(DTensorTestBase):
|
||||||
# Create shard groups (e.g. (0, 1, 2, 3), (4, 5, 6, 7))
|
# Create shard groups (e.g. (0, 1, 2, 3), (4, 5, 6, 7))
|
||||||
# and assign the correct shard group to each rank
|
# and assign the correct shard group to each rank
|
||||||
shard_rank_lists = (
|
shard_rank_lists = (
|
||||||
list(range(0, self.world_size // 2)),
|
list(range(self.world_size // 2)),
|
||||||
list(range(self.world_size // 2, self.world_size)),
|
list(range(self.world_size // 2, self.world_size)),
|
||||||
)
|
)
|
||||||
shard_groups = (
|
shard_groups = (
|
||||||
|
|
|
||||||
|
|
@ -5722,11 +5722,11 @@ class TestKL(DistributionsTestCase):
|
||||||
def test_kl_multivariate_normal(self):
|
def test_kl_multivariate_normal(self):
|
||||||
set_rng_seed(0) # see Note [Randomized statistical tests]
|
set_rng_seed(0) # see Note [Randomized statistical tests]
|
||||||
n = 5 # Number of tests for multivariate_normal
|
n = 5 # Number of tests for multivariate_normal
|
||||||
for i in range(0, n):
|
for i in range(n):
|
||||||
loc = [torch.randn(4) for _ in range(0, 2)]
|
loc = [torch.randn(4) for _ in range(2)]
|
||||||
scale_tril = [
|
scale_tril = [
|
||||||
transform_to(constraints.lower_cholesky)(torch.randn(4, 4))
|
transform_to(constraints.lower_cholesky)(torch.randn(4, 4))
|
||||||
for _ in range(0, 2)
|
for _ in range(2)
|
||||||
]
|
]
|
||||||
p = MultivariateNormal(loc=loc[0], scale_tril=scale_tril[0])
|
p = MultivariateNormal(loc=loc[0], scale_tril=scale_tril[0])
|
||||||
q = MultivariateNormal(loc=loc[1], scale_tril=scale_tril[1])
|
q = MultivariateNormal(loc=loc[1], scale_tril=scale_tril[1])
|
||||||
|
|
@ -5755,10 +5755,10 @@ class TestKL(DistributionsTestCase):
|
||||||
|
|
||||||
def test_kl_multivariate_normal_batched(self):
|
def test_kl_multivariate_normal_batched(self):
|
||||||
b = 7 # Number of batches
|
b = 7 # Number of batches
|
||||||
loc = [torch.randn(b, 3) for _ in range(0, 2)]
|
loc = [torch.randn(b, 3) for _ in range(2)]
|
||||||
scale_tril = [
|
scale_tril = [
|
||||||
transform_to(constraints.lower_cholesky)(torch.randn(b, 3, 3))
|
transform_to(constraints.lower_cholesky)(torch.randn(b, 3, 3))
|
||||||
for _ in range(0, 2)
|
for _ in range(2)
|
||||||
]
|
]
|
||||||
expected_kl = torch.stack(
|
expected_kl = torch.stack(
|
||||||
[
|
[
|
||||||
|
|
@ -5766,7 +5766,7 @@ class TestKL(DistributionsTestCase):
|
||||||
MultivariateNormal(loc[0][i], scale_tril=scale_tril[0][i]),
|
MultivariateNormal(loc[0][i], scale_tril=scale_tril[0][i]),
|
||||||
MultivariateNormal(loc[1][i], scale_tril=scale_tril[1][i]),
|
MultivariateNormal(loc[1][i], scale_tril=scale_tril[1][i]),
|
||||||
)
|
)
|
||||||
for i in range(0, b)
|
for i in range(b)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
actual_kl = kl_divergence(
|
actual_kl = kl_divergence(
|
||||||
|
|
@ -5777,7 +5777,7 @@ class TestKL(DistributionsTestCase):
|
||||||
|
|
||||||
def test_kl_multivariate_normal_batched_broadcasted(self):
|
def test_kl_multivariate_normal_batched_broadcasted(self):
|
||||||
b = 7 # Number of batches
|
b = 7 # Number of batches
|
||||||
loc = [torch.randn(b, 3) for _ in range(0, 2)]
|
loc = [torch.randn(b, 3) for _ in range(2)]
|
||||||
scale_tril = [
|
scale_tril = [
|
||||||
transform_to(constraints.lower_cholesky)(torch.randn(b, 3, 3)),
|
transform_to(constraints.lower_cholesky)(torch.randn(b, 3, 3)),
|
||||||
transform_to(constraints.lower_cholesky)(torch.randn(3, 3)),
|
transform_to(constraints.lower_cholesky)(torch.randn(3, 3)),
|
||||||
|
|
@ -5788,7 +5788,7 @@ class TestKL(DistributionsTestCase):
|
||||||
MultivariateNormal(loc[0][i], scale_tril=scale_tril[0][i]),
|
MultivariateNormal(loc[0][i], scale_tril=scale_tril[0][i]),
|
||||||
MultivariateNormal(loc[1][i], scale_tril=scale_tril[1]),
|
MultivariateNormal(loc[1][i], scale_tril=scale_tril[1]),
|
||||||
)
|
)
|
||||||
for i in range(0, b)
|
for i in range(b)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
actual_kl = kl_divergence(
|
actual_kl = kl_divergence(
|
||||||
|
|
@ -5800,15 +5800,15 @@ class TestKL(DistributionsTestCase):
|
||||||
def test_kl_lowrank_multivariate_normal(self):
|
def test_kl_lowrank_multivariate_normal(self):
|
||||||
set_rng_seed(0) # see Note [Randomized statistical tests]
|
set_rng_seed(0) # see Note [Randomized statistical tests]
|
||||||
n = 5 # Number of tests for lowrank_multivariate_normal
|
n = 5 # Number of tests for lowrank_multivariate_normal
|
||||||
for i in range(0, n):
|
for i in range(n):
|
||||||
loc = [torch.randn(4) for _ in range(0, 2)]
|
loc = [torch.randn(4) for _ in range(2)]
|
||||||
cov_factor = [torch.randn(4, 3) for _ in range(0, 2)]
|
cov_factor = [torch.randn(4, 3) for _ in range(2)]
|
||||||
cov_diag = [
|
cov_diag = [
|
||||||
transform_to(constraints.positive)(torch.randn(4)) for _ in range(0, 2)
|
transform_to(constraints.positive)(torch.randn(4)) for _ in range(2)
|
||||||
]
|
]
|
||||||
covariance_matrix = [
|
covariance_matrix = [
|
||||||
cov_factor[i].matmul(cov_factor[i].t()) + cov_diag[i].diag()
|
cov_factor[i].matmul(cov_factor[i].t()) + cov_diag[i].diag()
|
||||||
for i in range(0, 2)
|
for i in range(2)
|
||||||
]
|
]
|
||||||
p = LowRankMultivariateNormal(loc[0], cov_factor[0], cov_diag[0])
|
p = LowRankMultivariateNormal(loc[0], cov_factor[0], cov_diag[0])
|
||||||
q = LowRankMultivariateNormal(loc[1], cov_factor[1], cov_diag[1])
|
q = LowRankMultivariateNormal(loc[1], cov_factor[1], cov_diag[1])
|
||||||
|
|
@ -5861,10 +5861,10 @@ class TestKL(DistributionsTestCase):
|
||||||
|
|
||||||
def test_kl_lowrank_multivariate_normal_batched(self):
|
def test_kl_lowrank_multivariate_normal_batched(self):
|
||||||
b = 7 # Number of batches
|
b = 7 # Number of batches
|
||||||
loc = [torch.randn(b, 3) for _ in range(0, 2)]
|
loc = [torch.randn(b, 3) for _ in range(2)]
|
||||||
cov_factor = [torch.randn(b, 3, 2) for _ in range(0, 2)]
|
cov_factor = [torch.randn(b, 3, 2) for _ in range(2)]
|
||||||
cov_diag = [
|
cov_diag = [
|
||||||
transform_to(constraints.positive)(torch.randn(b, 3)) for _ in range(0, 2)
|
transform_to(constraints.positive)(torch.randn(b, 3)) for _ in range(2)
|
||||||
]
|
]
|
||||||
expected_kl = torch.stack(
|
expected_kl = torch.stack(
|
||||||
[
|
[
|
||||||
|
|
@ -5876,7 +5876,7 @@ class TestKL(DistributionsTestCase):
|
||||||
loc[1][i], cov_factor[1][i], cov_diag[1][i]
|
loc[1][i], cov_factor[1][i], cov_diag[1][i]
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
for i in range(0, b)
|
for i in range(b)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
actual_kl = kl_divergence(
|
actual_kl = kl_divergence(
|
||||||
|
|
|
||||||
|
|
@ -49,9 +49,9 @@ class ExportTests(torch._dynamo.test_case.TestCase):
|
||||||
lc_key = state[0]
|
lc_key = state[0]
|
||||||
lc_val = state[1]
|
lc_val = state[1]
|
||||||
bar = []
|
bar = []
|
||||||
for _ in range(0, 4):
|
for _ in range(4):
|
||||||
bar2 = []
|
bar2 = []
|
||||||
for _ in range(0, 3):
|
for _ in range(3):
|
||||||
bar2.append(
|
bar2.append(
|
||||||
lc_key + lc_val + torch.tensor([0.1, 0.25, 0.4, 0.5, 0.1])
|
lc_key + lc_val + torch.tensor([0.1, 0.25, 0.4, 0.5, 0.1])
|
||||||
)
|
)
|
||||||
|
|
@ -665,9 +665,9 @@ def forward(self, x, y):
|
||||||
lc_key = state[0]
|
lc_key = state[0]
|
||||||
lc_val = state[1]
|
lc_val = state[1]
|
||||||
bar = []
|
bar = []
|
||||||
for _ in range(0, 4):
|
for _ in range(4):
|
||||||
bar2 = []
|
bar2 = []
|
||||||
for _ in range(0, 3):
|
for _ in range(3):
|
||||||
bar2.append(
|
bar2.append(
|
||||||
lc_key + lc_val + torch.tensor([0.1, 0.25, 0.4, 0.5, 0.1])
|
lc_key + lc_val + torch.tensor([0.1, 0.25, 0.4, 0.5, 0.1])
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -3627,7 +3627,7 @@ class GraphModule(torch.nn.Module):
|
||||||
)
|
)
|
||||||
|
|
||||||
test(range(10), slice(1, 10, 2), expected=range(1, 10, 2))
|
test(range(10), slice(1, 10, 2), expected=range(1, 10, 2))
|
||||||
test(range(10), slice(None, 10, None), expected=range(0, 10))
|
test(range(10), slice(None, 10, None), expected=range(10))
|
||||||
test(range(10), slice(-1, 7, None), expected=range(9, 7))
|
test(range(10), slice(-1, 7, None), expected=range(9, 7))
|
||||||
test(range(10), slice(-1, 7, 2), expected=range(9, 7, 2))
|
test(range(10), slice(-1, 7, 2), expected=range(9, 7, 2))
|
||||||
test(range(1, 10, 2), slice(3, 7, 2), expected=range(7, 11, 4))
|
test(range(1, 10, 2), slice(3, 7, 2), expected=range(7, 11, 4))
|
||||||
|
|
|
||||||
|
|
@ -3047,7 +3047,7 @@ class OptimizedModuleTest(torch._dynamo.test_case.TestCase):
|
||||||
def generate(x, c):
|
def generate(x, c):
|
||||||
return mod(x) + c
|
return mod(x) + c
|
||||||
|
|
||||||
for _ in range(0, 10):
|
for _ in range(10):
|
||||||
generate(torch.randn(10, 10), 0)
|
generate(torch.randn(10, 10), 0)
|
||||||
generate(torch.randn(10, 10), 1)
|
generate(torch.randn(10, 10), 1)
|
||||||
self.assertEqual(cnt.frame_count, 2)
|
self.assertEqual(cnt.frame_count, 2)
|
||||||
|
|
|
||||||
|
|
@ -4471,7 +4471,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
|
||||||
|
|
||||||
compiled_fn = torch.compile(func, backend=cnt, fullgraph=True)
|
compiled_fn = torch.compile(func, backend=cnt, fullgraph=True)
|
||||||
requires_grad = func is not func1
|
requires_grad = func is not func1
|
||||||
for _ in range(0, 5):
|
for _ in range(5):
|
||||||
# Inputs
|
# Inputs
|
||||||
eager_a = torch.ones([6], requires_grad=requires_grad)
|
eager_a = torch.ones([6], requires_grad=requires_grad)
|
||||||
compiled_a = torch.ones([6], requires_grad=requires_grad)
|
compiled_a = torch.ones([6], requires_grad=requires_grad)
|
||||||
|
|
@ -4623,7 +4623,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
|
||||||
x = torch.rand([2, 2])
|
x = torch.rand([2, 2])
|
||||||
self.assertEqual(opt_fn(x, counter), fn(x, counter))
|
self.assertEqual(opt_fn(x, counter), fn(x, counter))
|
||||||
self.assertEqual(counter[0], 2)
|
self.assertEqual(counter[0], 2)
|
||||||
for _ in range(0, 10):
|
for _ in range(10):
|
||||||
opt_fn(x, counter)
|
opt_fn(x, counter)
|
||||||
self.assertEqual(counter[0], 12)
|
self.assertEqual(counter[0], 12)
|
||||||
if torch._dynamo.config.assume_static_by_default:
|
if torch._dynamo.config.assume_static_by_default:
|
||||||
|
|
@ -4784,7 +4784,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
|
||||||
def test_contains_range_constprop(self):
|
def test_contains_range_constprop(self):
|
||||||
def fn(x):
|
def fn(x):
|
||||||
# dynamo should const prop to False
|
# dynamo should const prop to False
|
||||||
if 3 in range(0, 10):
|
if 3 in range(10):
|
||||||
return x + 1
|
return x + 1
|
||||||
else:
|
else:
|
||||||
return x + 2
|
return x + 2
|
||||||
|
|
|
||||||
|
|
@ -106,7 +106,7 @@ class MemoryBudgetTest(TestCase):
|
||||||
return f(x, ws)
|
return f(x, ws)
|
||||||
|
|
||||||
_, eager_flops = get_mem_and_flops(call)
|
_, eager_flops = get_mem_and_flops(call)
|
||||||
for budget in range(0, 11):
|
for budget in range(11):
|
||||||
mem, flops = get_mem_and_flops(call, memory_budget=budget / 10)
|
mem, flops = get_mem_and_flops(call, memory_budget=budget / 10)
|
||||||
if budget <= 5:
|
if budget <= 5:
|
||||||
# We start saving the matmuls
|
# We start saving the matmuls
|
||||||
|
|
@ -251,7 +251,7 @@ class MemoryBudgetTest(TestCase):
|
||||||
return f(x, ws)
|
return f(x, ws)
|
||||||
|
|
||||||
expected = call()
|
expected = call()
|
||||||
for budget in range(0, 11):
|
for budget in range(11):
|
||||||
memory_budget = budget / 10
|
memory_budget = budget / 10
|
||||||
torch._dynamo.reset()
|
torch._dynamo.reset()
|
||||||
with config.patch(activation_memory_budget=memory_budget):
|
with config.patch(activation_memory_budget=memory_budget):
|
||||||
|
|
|
||||||
|
|
@ -1146,7 +1146,7 @@ class TestFxGraphCache(TestCase):
|
||||||
raise unittest.SkipTest(f"requires {GPU_TYPE}")
|
raise unittest.SkipTest(f"requires {GPU_TYPE}")
|
||||||
|
|
||||||
def fn1(x):
|
def fn1(x):
|
||||||
return x + torch.tensor(list(range(0, 12)), device=device)
|
return x + torch.tensor(list(range(12)), device=device)
|
||||||
|
|
||||||
def fn2(x):
|
def fn2(x):
|
||||||
return x + torch.tensor(list(range(1, 13)), device=device)
|
return x + torch.tensor(list(range(1, 13)), device=device)
|
||||||
|
|
|
||||||
|
|
@ -1599,7 +1599,7 @@ main()
|
||||||
|
|
||||||
eager_check()
|
eager_check()
|
||||||
|
|
||||||
for i in range(0, 5):
|
for i in range(5):
|
||||||
with compiled_autograd._enable(compiler_fn):
|
with compiled_autograd._enable(compiler_fn):
|
||||||
eager_check()
|
eager_check()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2095,7 +2095,7 @@ class TestMaxAutotune(TestCase):
|
||||||
|
|
||||||
# Test loop.
|
# Test loop.
|
||||||
def test_func2(x):
|
def test_func2(x):
|
||||||
for i in range(0, 10):
|
for i in range(10):
|
||||||
x = torch.matmul(x, x)
|
x = torch.matmul(x, x)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3005,7 +3005,7 @@ class MutationTests(torch._inductor.test_case.TestCase):
|
||||||
mask = offsets < n_elements
|
mask = offsets < n_elements
|
||||||
x = tl.load(in_ptr0 + offsets, mask=mask)
|
x = tl.load(in_ptr0 + offsets, mask=mask)
|
||||||
y = tl.load(in_ptr1 + offsets, mask=mask)
|
y = tl.load(in_ptr1 + offsets, mask=mask)
|
||||||
for i in range(0, BLOCK_SIZE):
|
for i in range(BLOCK_SIZE):
|
||||||
i = tl.multiple_of(i, 1)
|
i = tl.multiple_of(i, 1)
|
||||||
output = x + y
|
output = x + y
|
||||||
tl.store(out_ptr + offsets, output, mask=mask)
|
tl.store(out_ptr + offsets, output, mask=mask)
|
||||||
|
|
@ -3160,7 +3160,7 @@ class MutationTests(torch._inductor.test_case.TestCase):
|
||||||
x = tl.load(x_block_ptr)
|
x = tl.load(x_block_ptr)
|
||||||
|
|
||||||
# Compute gating
|
# Compute gating
|
||||||
for c2 in range(0, tl.cdiv(C2, BLOCK_SIZE_C2)):
|
for c2 in range(tl.cdiv(C2, BLOCK_SIZE_C2)):
|
||||||
# Compute block pointers
|
# Compute block pointers
|
||||||
offs_c2 = c2 * BLOCK_SIZE_C2 + tl.arange(0, BLOCK_SIZE_C2)
|
offs_c2 = c2 * BLOCK_SIZE_C2 + tl.arange(0, BLOCK_SIZE_C2)
|
||||||
o_block_ptr = O_ptr + offs_m[:, None] * C2 + offs_c2[None, :]
|
o_block_ptr = O_ptr + offs_m[:, None] * C2 + offs_c2[None, :]
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ class TestXNNPackBackend(unittest.TestCase):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
for _ in range(0, 20):
|
for _ in range(20):
|
||||||
sample_input = torch.randn(4, 4, 4)
|
sample_input = torch.randn(4, 4, 4)
|
||||||
actual_output = scripted_module(sample_input)
|
actual_output = scripted_module(sample_input)
|
||||||
expected_output = lowered_module(sample_input)
|
expected_output = lowered_module(sample_input)
|
||||||
|
|
|
||||||
|
|
@ -1292,7 +1292,7 @@ class TestConvolutionNN(NNTestCase):
|
||||||
kernel_x = torch.zeros([3, 1, 1, radius * 2 + 1], device=image.device)
|
kernel_x = torch.zeros([3, 1, 1, radius * 2 + 1], device=image.device)
|
||||||
image = torch.nn.functional.conv2d(image, kernel_x, groups=image.shape[-3])
|
image = torch.nn.functional.conv2d(image, kernel_x, groups=image.shape[-3])
|
||||||
|
|
||||||
for i in range(0, 128):
|
for i in range(128):
|
||||||
# This should not fail
|
# This should not fail
|
||||||
reproducer(radius=i)
|
reproducer(radius=i)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -551,7 +551,7 @@ class TestEmbeddingNNDeviceType(NNTestCase):
|
||||||
# Pull out the bag's indices from indices_1D, and fill any
|
# Pull out the bag's indices from indices_1D, and fill any
|
||||||
# remaining space with padding indices
|
# remaining space with padding indices
|
||||||
indices_in_bag = []
|
indices_in_bag = []
|
||||||
for item_pos in range(0, max_indices_per_bag):
|
for item_pos in range(max_indices_per_bag):
|
||||||
if (start + item_pos) < end:
|
if (start + item_pos) < end:
|
||||||
indices_in_bag.append(indices_1D[start + item_pos])
|
indices_in_bag.append(indices_1D[start + item_pos])
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -485,7 +485,7 @@ class TestMultiheadAttentionNN(NNTestCase):
|
||||||
)[0]
|
)[0]
|
||||||
output_3d = output_3d.transpose(0, 1) # [N, T, D]
|
output_3d = output_3d.transpose(0, 1) # [N, T, D]
|
||||||
|
|
||||||
for i in range(0, batch_size):
|
for i in range(batch_size):
|
||||||
output_2d = mta_model(
|
output_2d = mta_model(
|
||||||
query[i].unsqueeze(0).transpose(0, 1),
|
query[i].unsqueeze(0).transpose(0, 1),
|
||||||
key[i].unsqueeze(0).transpose(0, 1),
|
key[i].unsqueeze(0).transpose(0, 1),
|
||||||
|
|
|
||||||
|
|
@ -1135,7 +1135,7 @@ torch.cuda.synchronize()
|
||||||
for size, kernel_size, stride, dilation, ceil_mode in itertools.product(
|
for size, kernel_size, stride, dilation, ceil_mode in itertools.product(
|
||||||
sizes, kernel_sizes, strides, dilations, ceil_modes
|
sizes, kernel_sizes, strides, dilations, ceil_modes
|
||||||
):
|
):
|
||||||
padding = random.sample(range(0, math.floor(kernel_size / 2) + 1), 1)
|
padding = random.sample(range(math.floor(kernel_size / 2) + 1), 1)
|
||||||
check(
|
check(
|
||||||
torch.randn(size, device=device, dtype=dtype),
|
torch.randn(size, device=device, dtype=dtype),
|
||||||
kernel_size,
|
kernel_size,
|
||||||
|
|
|
||||||
|
|
@ -36,12 +36,12 @@ def check_onnx_opset_operator(
|
||||||
# but the op's attributes can optionally be
|
# but the op's attributes can optionally be
|
||||||
# specified as well
|
# specified as well
|
||||||
assert len(ops) == len(graph.node)
|
assert len(ops) == len(graph.node)
|
||||||
for i in range(0, len(ops)):
|
for i in range(len(ops)):
|
||||||
assert graph.node[i].op_type == ops[i]["op_name"]
|
assert graph.node[i].op_type == ops[i]["op_name"]
|
||||||
if "attributes" in ops[i]:
|
if "attributes" in ops[i]:
|
||||||
attributes = ops[i]["attributes"]
|
attributes = ops[i]["attributes"]
|
||||||
assert len(attributes) == len(graph.node[i].attribute)
|
assert len(attributes) == len(graph.node[i].attribute)
|
||||||
for j in range(0, len(attributes)):
|
for j in range(len(attributes)):
|
||||||
for attribute_field in attributes[j].keys():
|
for attribute_field in attributes[j].keys():
|
||||||
assert attributes[j][attribute_field] == getattr(
|
assert attributes[j][attribute_field] == getattr(
|
||||||
graph.node[i].attribute[j], attribute_field
|
graph.node[i].attribute[j], attribute_field
|
||||||
|
|
|
||||||
|
|
@ -1509,7 +1509,7 @@ class TestLRScheduler(TestCase):
|
||||||
14.0 / 3,
|
14.0 / 3,
|
||||||
29.0 / 6,
|
29.0 / 6,
|
||||||
]
|
]
|
||||||
deltas = [2 * i for i in range(0, 2)]
|
deltas = [2 * i for i in range(2)]
|
||||||
base_lrs = [1 + delta for delta in deltas]
|
base_lrs = [1 + delta for delta in deltas]
|
||||||
max_lrs = [5 + delta for delta in deltas]
|
max_lrs = [5 + delta for delta in deltas]
|
||||||
lr_targets = [[x + delta for x in lr_base_target] for delta in deltas]
|
lr_targets = [[x + delta for x in lr_base_target] for delta in deltas]
|
||||||
|
|
|
||||||
|
|
@ -1930,7 +1930,7 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
|
||||||
event_list.table()
|
event_list.table()
|
||||||
|
|
||||||
def _check_all_gpu_present(self, gpu_dict, max_gpu_count):
|
def _check_all_gpu_present(self, gpu_dict, max_gpu_count):
|
||||||
for i in range(0, max_gpu_count):
|
for i in range(max_gpu_count):
|
||||||
self.assertEqual(gpu_dict["GPU " + str(i)], 1)
|
self.assertEqual(gpu_dict["GPU " + str(i)], 1)
|
||||||
|
|
||||||
# Do json sanity testing. Checks that all events are between profiler start and end
|
# Do json sanity testing. Checks that all events are between profiler start and end
|
||||||
|
|
@ -2139,8 +2139,8 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
|
||||||
step_helper_funcs.append(event)
|
step_helper_funcs.append(event)
|
||||||
self.assertEqual(len(prof_steps), 5)
|
self.assertEqual(len(prof_steps), 5)
|
||||||
self.assertEqual(len(step_helper_funcs), 5)
|
self.assertEqual(len(step_helper_funcs), 5)
|
||||||
for i in range(0, len(step_helper_funcs)):
|
for i in range(len(step_helper_funcs)):
|
||||||
for j in range(0, len(step_helper_funcs)):
|
for j in range(len(step_helper_funcs)):
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
not self._partial_overlap(prof_steps[i], step_helper_funcs[j])
|
not self._partial_overlap(prof_steps[i], step_helper_funcs[j])
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -275,7 +275,7 @@ class TestFloat8Dtype(TestCase):
|
||||||
IMO simpler to special case e8m0 here.
|
IMO simpler to special case e8m0 here.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for biased_exponent in range(0, 256):
|
for biased_exponent in range(256):
|
||||||
# iterate through all the possible options of guard, round, sticky bits
|
# iterate through all the possible options of guard, round, sticky bits
|
||||||
# for the current exponent
|
# for the current exponent
|
||||||
for grs in range(8):
|
for grs in range(8):
|
||||||
|
|
|
||||||
|
|
@ -3494,7 +3494,7 @@ class TestIndividualWorkerQueue(TestCase):
|
||||||
max_num_workers = 1
|
max_num_workers = 1
|
||||||
|
|
||||||
for batch_size in (8, 16, 32, 64):
|
for batch_size in (8, 16, 32, 64):
|
||||||
for num_workers in range(0, min(6, max_num_workers)):
|
for num_workers in range(min(6, max_num_workers)):
|
||||||
self._run_ind_worker_queue_test(
|
self._run_ind_worker_queue_test(
|
||||||
batch_size=batch_size, num_workers=num_workers + 1
|
batch_size=batch_size, num_workers=num_workers + 1
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -520,7 +520,7 @@ class TestIterableDataPipeBasic(TestCase):
|
||||||
self.assertEqual(list(range(9)), list(n))
|
self.assertEqual(list(range(9)), list(n))
|
||||||
|
|
||||||
# Functional Test: Uneven DataPipes
|
# Functional Test: Uneven DataPipes
|
||||||
source_numbers = list(range(0, 10)) + [10, 12]
|
source_numbers = list(range(10)) + [10, 12]
|
||||||
numbers_dp = dp.iter.IterableWrapper(source_numbers)
|
numbers_dp = dp.iter.IterableWrapper(source_numbers)
|
||||||
n1, n2 = numbers_dp.demux(2, lambda x: x % 2)
|
n1, n2 = numbers_dp.demux(2, lambda x: x % 2)
|
||||||
self.assertEqual([0, 2, 4, 6, 8, 10, 12], list(n1))
|
self.assertEqual([0, 2, 4, 6, 8, 10, 12], list(n1))
|
||||||
|
|
@ -1257,7 +1257,7 @@ class TestFunctionalIterDataPipe(TestCase):
|
||||||
)
|
)
|
||||||
output1, output2 = list(dp1), list(dp2)
|
output1, output2 = list(dp1), list(dp2)
|
||||||
self.assertEqual(list(range(5, 10)), output1)
|
self.assertEqual(list(range(5, 10)), output1)
|
||||||
self.assertEqual(list(range(0, 5)), output2)
|
self.assertEqual(list(range(5)), output2)
|
||||||
|
|
||||||
# Functional Test: values of the same classification are lumped together, and unlimited buffer
|
# Functional Test: values of the same classification are lumped together, and unlimited buffer
|
||||||
with warnings.catch_warnings(record=True) as wa:
|
with warnings.catch_warnings(record=True) as wa:
|
||||||
|
|
@ -1271,7 +1271,7 @@ class TestFunctionalIterDataPipe(TestCase):
|
||||||
self.assertRegex(str(wa[-1].message), r"Unlimited buffer size is set")
|
self.assertRegex(str(wa[-1].message), r"Unlimited buffer size is set")
|
||||||
output1, output2 = list(dp1), list(dp2)
|
output1, output2 = list(dp1), list(dp2)
|
||||||
self.assertEqual(list(range(5, 10)), output1)
|
self.assertEqual(list(range(5, 10)), output1)
|
||||||
self.assertEqual(list(range(0, 5)), output2)
|
self.assertEqual(list(range(5)), output2)
|
||||||
|
|
||||||
# Functional Test: classifier returns a value outside of [0, num_instance - 1]
|
# Functional Test: classifier returns a value outside of [0, num_instance - 1]
|
||||||
dp0 = input_dp.demux(num_instances=1, classifier_fn=lambda x: x % 2)
|
dp0 = input_dp.demux(num_instances=1, classifier_fn=lambda x: x % 2)
|
||||||
|
|
|
||||||
|
|
@ -1385,7 +1385,7 @@ class f(torch.nn.Module):
|
||||||
self.assertEqual(x.storage_offset(), y.storage_offset())
|
self.assertEqual(x.storage_offset(), y.storage_offset())
|
||||||
|
|
||||||
def test_tensor_factory_with_symint(self):
|
def test_tensor_factory_with_symint(self):
|
||||||
args = list(range(0, 3))
|
args = list(range(3))
|
||||||
expected = torch.tensor(args)
|
expected = torch.tensor(args)
|
||||||
|
|
||||||
shape_env = ShapeEnv()
|
shape_env = ShapeEnv()
|
||||||
|
|
|
||||||
|
|
@ -902,7 +902,7 @@ class TestIndexing(TestCase):
|
||||||
# Set window size
|
# Set window size
|
||||||
W = 10
|
W = 10
|
||||||
# Generate a list of lists, containing overlapping window indices
|
# Generate a list of lists, containing overlapping window indices
|
||||||
indices = [range(i, i + W) for i in range(0, N - W)]
|
indices = [range(i, i + W) for i in range(N - W)]
|
||||||
|
|
||||||
for i in [len(indices), 100, 32]:
|
for i in [len(indices), 100, 32]:
|
||||||
windowed_data = t[indices[:i]]
|
windowed_data = t[indices[:i]]
|
||||||
|
|
|
||||||
|
|
@ -3153,7 +3153,7 @@ class TestScript(JitTestCase):
|
||||||
eplan = get_execution_plan(dstate)
|
eplan = get_execution_plan(dstate)
|
||||||
num_bailouts = eplan.code.num_bailouts()
|
num_bailouts = eplan.code.num_bailouts()
|
||||||
|
|
||||||
for i in range(0, num_bailouts):
|
for i in range(num_bailouts):
|
||||||
eplan.code.request_bailout(i)
|
eplan.code.request_bailout(i)
|
||||||
self.assertEqual(jitted(x), expected)
|
self.assertEqual(jitted(x), expected)
|
||||||
|
|
||||||
|
|
@ -5950,7 +5950,7 @@ a")
|
||||||
# type: (int) -> int
|
# type: (int) -> int
|
||||||
prev = 1
|
prev = 1
|
||||||
v = 1
|
v = 1
|
||||||
for i in range(0, x):
|
for i in range(x):
|
||||||
save = v
|
save = v
|
||||||
v = v + prev
|
v = v + prev
|
||||||
prev = save
|
prev = save
|
||||||
|
|
@ -10938,7 +10938,7 @@ dedent """
|
||||||
|
|
||||||
# Test symbolic differentiation
|
# Test symbolic differentiation
|
||||||
# Run Forward and Backward thrice to trigger autodiff graph
|
# Run Forward and Backward thrice to trigger autodiff graph
|
||||||
for i in range(0, 3):
|
for i in range(3):
|
||||||
y = jit_module(x)
|
y = jit_module(x)
|
||||||
y.backward(grad)
|
y.backward(grad)
|
||||||
x.grad.zero_()
|
x.grad.zero_()
|
||||||
|
|
@ -11802,7 +11802,7 @@ dedent """
|
||||||
def fn_zip_enumerate(x, y):
|
def fn_zip_enumerate(x, y):
|
||||||
# type: (List[int], List[int]) -> int
|
# type: (List[int], List[int]) -> int
|
||||||
sum = 0
|
sum = 0
|
||||||
for (i, (j, v), k) in zip(x, enumerate(y), range(0, 100)):
|
for (i, (j, v), k) in zip(x, enumerate(y), range(100)):
|
||||||
sum += i * j * v * k
|
sum += i * j * v * k
|
||||||
|
|
||||||
return sum
|
return sum
|
||||||
|
|
|
||||||
|
|
@ -243,7 +243,7 @@ class TestTEFuser(JitTestCase):
|
||||||
return x2.sum()
|
return x2.sum()
|
||||||
|
|
||||||
with texpr_reductions_enabled():
|
with texpr_reductions_enabled():
|
||||||
a = torch.tensor(list(range(0, 15)), dtype=torch.float, device="cpu")
|
a = torch.tensor(list(range(15)), dtype=torch.float, device="cpu")
|
||||||
a = a.reshape(5, 3)
|
a = a.reshape(5, 3)
|
||||||
scripted = self.checkScript(func, (a,))
|
scripted = self.checkScript(func, (a,))
|
||||||
self.assertLastGraphAllFused()
|
self.assertLastGraphAllFused()
|
||||||
|
|
@ -259,7 +259,7 @@ class TestTEFuser(JitTestCase):
|
||||||
return x.sum((-2,)) * 2
|
return x.sum((-2,)) * 2
|
||||||
|
|
||||||
with texpr_reductions_enabled():
|
with texpr_reductions_enabled():
|
||||||
a = torch.tensor(list(range(0, 15)), dtype=torch.float, device="cpu")
|
a = torch.tensor(list(range(15)), dtype=torch.float, device="cpu")
|
||||||
a = a.reshape(5, 3)
|
a = a.reshape(5, 3)
|
||||||
scripted = self.checkScript(func, (a,))
|
scripted = self.checkScript(func, (a,))
|
||||||
self.assertLastGraphAllFused()
|
self.assertLastGraphAllFused()
|
||||||
|
|
@ -271,7 +271,7 @@ class TestTEFuser(JitTestCase):
|
||||||
return x.sum((0,), keepdim=True, dtype=torch.double) * 2
|
return x.sum((0,), keepdim=True, dtype=torch.double) * 2
|
||||||
|
|
||||||
with texpr_reductions_enabled():
|
with texpr_reductions_enabled():
|
||||||
a = torch.tensor(list(range(0, 15)), dtype=torch.float, device="cpu")
|
a = torch.tensor(list(range(15)), dtype=torch.float, device="cpu")
|
||||||
a = a.reshape(5, 3)
|
a = a.reshape(5, 3)
|
||||||
|
|
||||||
self.checkScript(func, (a,))
|
self.checkScript(func, (a,))
|
||||||
|
|
@ -2234,7 +2234,7 @@ class TestTEFuser(JitTestCase):
|
||||||
|
|
||||||
indices = [0, 1, 2, 3]
|
indices = [0, 1, 2, 3]
|
||||||
sets = []
|
sets = []
|
||||||
for i in range(0, len(indices) + 1):
|
for i in range(len(indices) + 1):
|
||||||
for subset in combinations(indices, i):
|
for subset in combinations(indices, i):
|
||||||
sets.append(subset) # noqa: PERF402
|
sets.append(subset) # noqa: PERF402
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -231,7 +231,7 @@ class TestMatmulCuda(InductorTestCase):
|
||||||
def test_cublas_addmm_alignment(self, dtype):
|
def test_cublas_addmm_alignment(self, dtype):
|
||||||
device = 'cuda'
|
device = 'cuda'
|
||||||
# perturb X, A, or B alignment
|
# perturb X, A, or B alignment
|
||||||
for idx in range(0, 3):
|
for idx in range(3):
|
||||||
for offset in range(1, 3):
|
for offset in range(1, 3):
|
||||||
offsets = [0, 0, 0]
|
offsets = [0, 0, 0]
|
||||||
offsets[idx] = offset
|
offsets[idx] = offset
|
||||||
|
|
|
||||||
|
|
@ -1900,7 +1900,7 @@ class TestMPS(TestCaseMPS):
|
||||||
res_cpu = torch.linalg.vector_norm(B_cpu, ord=3.5)
|
res_cpu = torch.linalg.vector_norm(B_cpu, ord=3.5)
|
||||||
self.assertEqual(res_mps, res_cpu)
|
self.assertEqual(res_mps, res_cpu)
|
||||||
|
|
||||||
for dim in range(0, B_mps.dim()):
|
for dim in range(B_mps.dim()):
|
||||||
res_mps = torch.linalg.vector_norm(B_mps, ord=3.5, dim=dim)
|
res_mps = torch.linalg.vector_norm(B_mps, ord=3.5, dim=dim)
|
||||||
res_cpu = torch.linalg.vector_norm(B_cpu, ord=3.5, dim=dim)
|
res_cpu = torch.linalg.vector_norm(B_cpu, ord=3.5, dim=dim)
|
||||||
self.assertEqual(res_mps, res_cpu)
|
self.assertEqual(res_mps, res_cpu)
|
||||||
|
|
@ -2871,8 +2871,8 @@ class TestMPS(TestCaseMPS):
|
||||||
|
|
||||||
def test_contiguous_slice_2d(self):
|
def test_contiguous_slice_2d(self):
|
||||||
def helper(shape):
|
def helper(shape):
|
||||||
for i in range(0, shape[0]):
|
for i in range(shape[0]):
|
||||||
for j in range(0, shape[1]):
|
for j in range(shape[1]):
|
||||||
t_mps = torch.randn(shape, device="mps")
|
t_mps = torch.randn(shape, device="mps")
|
||||||
t_cpu = t_mps.detach().clone().cpu()
|
t_cpu = t_mps.detach().clone().cpu()
|
||||||
|
|
||||||
|
|
@ -3432,12 +3432,12 @@ class TestMPS(TestCaseMPS):
|
||||||
elems = torch.arange(n_tensors * n_tensor_elems, dtype=torch.float32)
|
elems = torch.arange(n_tensors * n_tensor_elems, dtype=torch.float32)
|
||||||
|
|
||||||
tensor_list = []
|
tensor_list = []
|
||||||
for i in range(0, n_tensors - 1):
|
for i in range(n_tensors - 1):
|
||||||
# create a list of contiguous view tensors (view tensor created by the slice op)
|
# create a list of contiguous view tensors (view tensor created by the slice op)
|
||||||
t = elems[n_tensor_elems * i : n_tensor_elems * (i + 1)]
|
t = elems[n_tensor_elems * i : n_tensor_elems * (i + 1)]
|
||||||
tensor_list.append(t)
|
tensor_list.append(t)
|
||||||
|
|
||||||
for i in range(0, n_tensors - 1):
|
for i in range(n_tensors - 1):
|
||||||
t = tensor_list[i].view(1, n_tensor_elems)
|
t = tensor_list[i].view(1, n_tensor_elems)
|
||||||
t_mps = t.to("mps")
|
t_mps = t.to("mps")
|
||||||
self.assertEqual(t, t_mps.cpu(), f"i={i}")
|
self.assertEqual(t, t_mps.cpu(), f"i={i}")
|
||||||
|
|
@ -4942,7 +4942,7 @@ class TestMPS(TestCaseMPS):
|
||||||
x_mps = fn(torch.zeros(shape, device="mps"), dim=dim)
|
x_mps = fn(torch.zeros(shape, device="mps"), dim=dim)
|
||||||
self.assertEqual(x_cpu, x_mps.cpu())
|
self.assertEqual(x_cpu, x_mps.cpu())
|
||||||
for fn in [torch.any, torch.all]:
|
for fn in [torch.any, torch.all]:
|
||||||
for dim in range(0, 4):
|
for dim in range(4):
|
||||||
helper(fn, dim)
|
helper(fn, dim)
|
||||||
|
|
||||||
# 6D tensor reductions
|
# 6D tensor reductions
|
||||||
|
|
@ -9750,7 +9750,7 @@ class TestGatherScatter(TestCaseMPS):
|
||||||
self.assertEqual(x_cpu, x_mps)
|
self.assertEqual(x_cpu, x_mps)
|
||||||
|
|
||||||
def test_cast_gather_scatter(self):
|
def test_cast_gather_scatter(self):
|
||||||
for _ in range(0, 50):
|
for _ in range(50):
|
||||||
input = np.random.randint(0, 255, size=(5, 5, 4), dtype=np.uint8)
|
input = np.random.randint(0, 255, size=(5, 5, 4), dtype=np.uint8)
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
s = torch.tensor(input, dtype=torch.uint8, device="mps").unsqueeze(0)
|
s = torch.tensor(input, dtype=torch.uint8, device="mps").unsqueeze(0)
|
||||||
|
|
|
||||||
|
|
@ -549,7 +549,7 @@ class NumaBindingTest(TestCase):
|
||||||
bound_logical_cpu_indices_0,
|
bound_logical_cpu_indices_0,
|
||||||
# Gets an extra physical core due to odd number of physical cores on numa node
|
# Gets an extra physical core due to odd number of physical cores on numa node
|
||||||
# 3 physical cores total, 2 GPUs: GPU 0 gets 2 physical cores (CPUs 0-3)
|
# 3 physical cores total, 2 GPUs: GPU 0 gets 2 physical cores (CPUs 0-3)
|
||||||
set(range(0, 4)),
|
set(range(4)),
|
||||||
)
|
)
|
||||||
|
|
||||||
bound_logical_cpu_indices_1 = (
|
bound_logical_cpu_indices_1 = (
|
||||||
|
|
@ -677,7 +677,7 @@ class NumaBindingTest(TestCase):
|
||||||
# 1 numa node, 2 L3 caches, 1 physical core per L3 cache = 2 logical CPUs per cache
|
# 1 numa node, 2 L3 caches, 1 physical core per L3 cache = 2 logical CPUs per cache
|
||||||
# L3 cache 0: CPUs 0-1, L3 cache 1: CPUs 2-3
|
# L3 cache 0: CPUs 0-1, L3 cache 1: CPUs 2-3
|
||||||
# Both have same number of CPUs, so prefer lower cache key (0)
|
# Both have same number of CPUs, so prefer lower cache key (0)
|
||||||
set(range(0, 2)),
|
set(range(2)),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_binds_to_node_0_if_node_stored_as_minus_one(self) -> None:
|
def test_binds_to_node_0_if_node_stored_as_minus_one(self) -> None:
|
||||||
|
|
@ -709,7 +709,7 @@ class NumaBindingTest(TestCase):
|
||||||
# GPU 0 has numa node stored as -1, which is treated as numa node 0
|
# GPU 0 has numa node stored as -1, which is treated as numa node 0
|
||||||
# Each numa node has 1 * 1 * 2 = 2 logical CPUs
|
# Each numa node has 1 * 1 * 2 = 2 logical CPUs
|
||||||
# Numa node 0 has CPUs 0-1
|
# Numa node 0 has CPUs 0-1
|
||||||
set(range(0, 2)),
|
set(range(2)),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_callable_entrypoint_basic(self) -> None:
|
def test_callable_entrypoint_basic(self) -> None:
|
||||||
|
|
|
||||||
|
|
@ -1710,7 +1710,7 @@ class TestReductions(TestCase):
|
||||||
with_extremal=False, atol=None, rtol=None,
|
with_extremal=False, atol=None, rtol=None,
|
||||||
exact_dtype=True, with_keepdim=False):
|
exact_dtype=True, with_keepdim=False):
|
||||||
# Test 0-d to 3-d tensors.
|
# Test 0-d to 3-d tensors.
|
||||||
for ndims in range(0, 4):
|
for ndims in range(4):
|
||||||
shape = _rand_shape(ndims, min_size=5, max_size=10)
|
shape = _rand_shape(ndims, min_size=5, max_size=10)
|
||||||
for n in range(ndims + 1):
|
for n in range(ndims + 1):
|
||||||
for c in combinations(list(range(ndims)), n):
|
for c in combinations(list(range(ndims)), n):
|
||||||
|
|
@ -2623,7 +2623,7 @@ class TestReductions(TestCase):
|
||||||
# Generate some random test cases
|
# Generate some random test cases
|
||||||
ops = ['quantile', 'nanquantile']
|
ops = ['quantile', 'nanquantile']
|
||||||
inputs = [tuple(np.random.randint(2, 10, size=i)) for i in range(1, 4)]
|
inputs = [tuple(np.random.randint(2, 10, size=i)) for i in range(1, 4)]
|
||||||
quantiles = [tuple(np.random.rand(i)) for i in range(0, 5)]
|
quantiles = [tuple(np.random.rand(i)) for i in range(5)]
|
||||||
keepdims = [True, False]
|
keepdims = [True, False]
|
||||||
|
|
||||||
# Add corner cases
|
# Add corner cases
|
||||||
|
|
|
||||||
|
|
@ -295,7 +295,7 @@ class SerializationMixin:
|
||||||
5,
|
5,
|
||||||
6
|
6
|
||||||
]
|
]
|
||||||
for i in range(0, 100):
|
for i in range(100):
|
||||||
data.append(0)
|
data.append(0)
|
||||||
t = torch.tensor(data, dtype=torch.uint8)
|
t = torch.tensor(data, dtype=torch.uint8)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5300,7 +5300,7 @@ class TestSparseAny(TestCase):
|
||||||
x_dense = torch.eye(dense_dim, dtype=dtype, device=device)
|
x_dense = torch.eye(dense_dim, dtype=dtype, device=device)
|
||||||
for sparse_dim_in in range(1, dense_dim):
|
for sparse_dim_in in range(1, dense_dim):
|
||||||
x_sparse = x_dense.to_sparse(sparse_dim_in)
|
x_sparse = x_dense.to_sparse(sparse_dim_in)
|
||||||
for sparse_dim_out in range(0, dense_dim):
|
for sparse_dim_out in range(dense_dim):
|
||||||
if sparse_dim_out == sparse_dim_in:
|
if sparse_dim_out == sparse_dim_in:
|
||||||
self.assertTrue(x_sparse.to_sparse(sparse_dim_out).sparse_dim() == sparse_dim_out)
|
self.assertTrue(x_sparse.to_sparse(sparse_dim_out).sparse_dim() == sparse_dim_out)
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,7 @@ class TestSparseCSRSampler(TestCase):
|
||||||
index_dtype = torch.int32
|
index_dtype = torch.int32
|
||||||
for n_rows in range(1, 10):
|
for n_rows in range(1, 10):
|
||||||
for n_cols in range(1, 10):
|
for n_cols in range(1, 10):
|
||||||
for nnz in range(0, n_rows * n_cols + 1):
|
for nnz in range(n_rows * n_cols + 1):
|
||||||
crow_indices = self._make_crow_indices(
|
crow_indices = self._make_crow_indices(
|
||||||
n_rows, n_cols, nnz,
|
n_rows, n_cols, nnz,
|
||||||
device=device, dtype=index_dtype)
|
device=device, dtype=index_dtype)
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ class MultiHeadAttentionLayer(nn.Module):
|
||||||
# Taken from https://github.com/facebookresearch/dlrm/blob/master/dlrm_s_pytorch.py
|
# Taken from https://github.com/facebookresearch/dlrm/blob/master/dlrm_s_pytorch.py
|
||||||
def create_mlp(ln, sigmoid_layer):
|
def create_mlp(ln, sigmoid_layer):
|
||||||
layers = nn.ModuleList()
|
layers = nn.ModuleList()
|
||||||
for i in range(0, len(ln) - 1):
|
for i in range(len(ln) - 1):
|
||||||
n = ln[i]
|
n = ln[i]
|
||||||
m = ln[i + 1]
|
m = ln[i + 1]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -200,7 +200,7 @@ class TestTensorBoardPyTorchNumpy(BaseTestCase):
|
||||||
bucket_counts=counts.tolist(),
|
bucket_counts=counts.tolist(),
|
||||||
)
|
)
|
||||||
|
|
||||||
ints = torch.tensor(range(0, 100)).float()
|
ints = torch.tensor(range(100)).float()
|
||||||
nbins = 100
|
nbins = 100
|
||||||
counts = torch.histc(ints, bins=nbins, min=0, max=99)
|
counts = torch.histc(ints, bins=nbins, min=0, max=99)
|
||||||
limits = torch.tensor(range(nbins))
|
limits = torch.tensor(range(nbins))
|
||||||
|
|
|
||||||
|
|
@ -1216,7 +1216,7 @@ class TestTensorExprFuser(BaseTestClass):
|
||||||
@torch.jit.script
|
@torch.jit.script
|
||||||
def test(x: torch.Tensor, y: torch.Tensor, z: int) -> torch.Tensor:
|
def test(x: torch.Tensor, y: torch.Tensor, z: int) -> torch.Tensor:
|
||||||
b = y
|
b = y
|
||||||
for i in range(0, z):
|
for i in range(z):
|
||||||
a = x + y
|
a = x + y
|
||||||
b = b + y
|
b = b + y
|
||||||
return b
|
return b
|
||||||
|
|
|
||||||
|
|
@ -8424,7 +8424,7 @@ tensor([[[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j],
|
||||||
def test_Size_iter(self):
|
def test_Size_iter(self):
|
||||||
for sizes in [iter([1, 2, 3, 4, 5]), range(1, 6)]:
|
for sizes in [iter([1, 2, 3, 4, 5]), range(1, 6)]:
|
||||||
x = torch.Size(sizes)
|
x = torch.Size(sizes)
|
||||||
for i in range(0, 5):
|
for i in range(5):
|
||||||
self.assertEqual(x[i], i + 1)
|
self.assertEqual(x[i], i + 1)
|
||||||
|
|
||||||
def test_t_not_2d_error(self):
|
def test_t_not_2d_error(self):
|
||||||
|
|
|
||||||
|
|
@ -1559,7 +1559,7 @@ class TestOldViewOps(TestCase):
|
||||||
self.compare_with_numpy(torch_fn, np_fn, x, device=None, dtype=None)
|
self.compare_with_numpy(torch_fn, np_fn, x, device=None, dtype=None)
|
||||||
|
|
||||||
def _test_atleast_dim(self, torch_fn, np_fn, device, dtype):
|
def _test_atleast_dim(self, torch_fn, np_fn, device, dtype):
|
||||||
for ndims in range(0, 5):
|
for ndims in range(5):
|
||||||
shape = _rand_shape(ndims, min_size=5, max_size=10)
|
shape = _rand_shape(ndims, min_size=5, max_size=10)
|
||||||
for _ in range(ndims + 1):
|
for _ in range(ndims + 1):
|
||||||
for with_extremal in [False, True]:
|
for with_extremal in [False, True]:
|
||||||
|
|
|
||||||
|
|
@ -1316,7 +1316,7 @@ class TestXNNPACKConv1dTransformPass(TestCase):
|
||||||
groups_list = range(1, 3)
|
groups_list = range(1, 3)
|
||||||
kernel_list = range(1, 4)
|
kernel_list = range(1, 4)
|
||||||
stride_list = range(1, 3)
|
stride_list = range(1, 3)
|
||||||
padding_list = range(0, 3)
|
padding_list = range(3)
|
||||||
dilation_list = range(1, 3)
|
dilation_list = range(1, 3)
|
||||||
|
|
||||||
for hparams in itertools.product(
|
for hparams in itertools.product(
|
||||||
|
|
@ -1401,7 +1401,7 @@ class TestXNNPACKConv1dTransformPass(TestCase):
|
||||||
groups_list = range(1, 3)
|
groups_list = range(1, 3)
|
||||||
kernel_list = range(1, 4)
|
kernel_list = range(1, 4)
|
||||||
stride_list = range(1, 3)
|
stride_list = range(1, 3)
|
||||||
padding_list = range(0, 3)
|
padding_list = range(3)
|
||||||
dilation_list = range(1, 3)
|
dilation_list = range(1, 3)
|
||||||
output_features_list = range(1, 3)
|
output_features_list = range(1, 3)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -147,7 +147,7 @@ def native_layer_norm_backward(
|
||||||
inner_dims = input_shape[axis:]
|
inner_dims = input_shape[axis:]
|
||||||
outer_dims = input_shape[:axis]
|
outer_dims = input_shape[:axis]
|
||||||
inner_dim_indices = list(range(axis, input_ndim))
|
inner_dim_indices = list(range(axis, input_ndim))
|
||||||
outer_dim_indices = list(range(0, axis))
|
outer_dim_indices = list(range(axis))
|
||||||
|
|
||||||
N = 1
|
N = 1
|
||||||
for i in inner_dims:
|
for i in inner_dims:
|
||||||
|
|
|
||||||
|
|
@ -1248,7 +1248,7 @@ def argument_names(
|
||||||
# signature. Assign names as {varargs}_0, {varargs}_1, ...
|
# signature. Assign names as {varargs}_0, {varargs}_1, ...
|
||||||
assert fullargspec.varargs is not None, "More arguments than expected"
|
assert fullargspec.varargs is not None, "More arguments than expected"
|
||||||
input_strs += [
|
input_strs += [
|
||||||
f"{fullargspec.varargs}_{i}" for i in range(0, len(args) - len(input_strs))
|
f"{fullargspec.varargs}_{i}" for i in range(len(args) - len(input_strs))
|
||||||
]
|
]
|
||||||
elif len(args) < len(fullargspec.args):
|
elif len(args) < len(fullargspec.args):
|
||||||
# 3. If there are fewer arguments in `args` than `fullargspec.args`,
|
# 3. If there are fewer arguments in `args` than `fullargspec.args`,
|
||||||
|
|
@ -1538,7 +1538,7 @@ class FlattenInputOutputSignature(torch.fx.Transformer):
|
||||||
}
|
}
|
||||||
|
|
||||||
self.new_args = []
|
self.new_args = []
|
||||||
for i in range(0, len(flat_args)):
|
for i in range(len(flat_args)):
|
||||||
arg = super().placeholder(f"arg{i}", (), {})
|
arg = super().placeholder(f"arg{i}", (), {})
|
||||||
if i in matched_input_elements_to_fake:
|
if i in matched_input_elements_to_fake:
|
||||||
arg.node.meta["val"] = matched_input_elements_to_fake[i]
|
arg.node.meta["val"] = matched_input_elements_to_fake[i]
|
||||||
|
|
|
||||||
|
|
@ -151,7 +151,7 @@ class MemoryDep(Dep):
|
||||||
stride_to_index = {s: i for i, s in enumerate(self_strides)}
|
stride_to_index = {s: i for i, s in enumerate(self_strides)}
|
||||||
order = [stride_to_index[s] for s in other_strides]
|
order = [stride_to_index[s] for s in other_strides]
|
||||||
|
|
||||||
assert OrderedSet(order) == OrderedSet(range(0, self.num_vars))
|
assert OrderedSet(order) == OrderedSet(range(self.num_vars))
|
||||||
return order
|
return order
|
||||||
|
|
||||||
def get_offset(self) -> sympy.Expr:
|
def get_offset(self) -> sympy.Expr:
|
||||||
|
|
|
||||||
|
|
@ -1787,7 +1787,7 @@ def _padding_check_valid_input(input, padding, *, dim):
|
||||||
for d in range(1, input_dim):
|
for d in range(1, input_dim):
|
||||||
valid_batch_mode = valid_batch_mode and input.size(d) != 0
|
valid_batch_mode = valid_batch_mode and input.size(d) != 0
|
||||||
else:
|
else:
|
||||||
for d in range(0, input_dim):
|
for d in range(input_dim):
|
||||||
valid_non_batch_mode = valid_non_batch_mode and input.size(d) != 0
|
valid_non_batch_mode = valid_non_batch_mode and input.size(d) != 0
|
||||||
|
|
||||||
# allow empty batch size but not other dimensions.
|
# allow empty batch size but not other dimensions.
|
||||||
|
|
|
||||||
|
|
@ -1449,7 +1449,7 @@ def rollaxis(a: ArrayLike, axis, start=0):
|
||||||
# numpy returns a view, here we try returning the tensor itself
|
# numpy returns a view, here we try returning the tensor itself
|
||||||
# return tensor[...]
|
# return tensor[...]
|
||||||
return a
|
return a
|
||||||
axes = list(range(0, n))
|
axes = list(range(n))
|
||||||
axes.remove(axis)
|
axes.remove(axis)
|
||||||
axes.insert(start, axis)
|
axes.insert(start, axis)
|
||||||
return a.view(axes)
|
return a.view(axes)
|
||||||
|
|
|
||||||
|
|
@ -4738,7 +4738,7 @@ def transpose(a: TensorLikeType, dim0: int, dim1: int) -> TensorLikeType:
|
||||||
if a.ndim <= 1 or dim0 == dim1:
|
if a.ndim <= 1 or dim0 == dim1:
|
||||||
return aten.alias.default(a)
|
return aten.alias.default(a)
|
||||||
|
|
||||||
_permutation = list(range(0, a.ndim))
|
_permutation = list(range(a.ndim))
|
||||||
_permutation[_dim0] = _dim1
|
_permutation[_dim0] = _dim1
|
||||||
_permutation[_dim1] = _dim0
|
_permutation[_dim1] = _dim0
|
||||||
return torch.permute(a, _permutation)
|
return torch.permute(a, _permutation)
|
||||||
|
|
|
||||||
|
|
@ -307,7 +307,7 @@ def _tensor_str_with_formatter(self, indent, summarize, formatter1, formatter2=N
|
||||||
_tensor_str_with_formatter(
|
_tensor_str_with_formatter(
|
||||||
self[i], indent + 1, summarize, formatter1, formatter2
|
self[i], indent + 1, summarize, formatter1, formatter2
|
||||||
)
|
)
|
||||||
for i in range(0, PRINT_OPTS.edgeitems)
|
for i in range(PRINT_OPTS.edgeitems)
|
||||||
]
|
]
|
||||||
+ ["..."]
|
+ ["..."]
|
||||||
+ [
|
+ [
|
||||||
|
|
@ -322,7 +322,7 @@ def _tensor_str_with_formatter(self, indent, summarize, formatter1, formatter2=N
|
||||||
_tensor_str_with_formatter(
|
_tensor_str_with_formatter(
|
||||||
self[i], indent + 1, summarize, formatter1, formatter2
|
self[i], indent + 1, summarize, formatter1, formatter2
|
||||||
)
|
)
|
||||||
for i in range(0, self.size(0))
|
for i in range(self.size(0))
|
||||||
]
|
]
|
||||||
|
|
||||||
tensor_str = ("," + "\n" * (dim - 1) + " " * (indent + 1)).join(slices)
|
tensor_str = ("," + "\n" * (dim - 1) + " " * (indent + 1)).join(slices)
|
||||||
|
|
@ -406,7 +406,7 @@ def get_summarized_data(self):
|
||||||
if not PRINT_OPTS.edgeitems:
|
if not PRINT_OPTS.edgeitems:
|
||||||
return self.new_empty([0] * self.dim())
|
return self.new_empty([0] * self.dim())
|
||||||
elif self.size(0) > 2 * PRINT_OPTS.edgeitems:
|
elif self.size(0) > 2 * PRINT_OPTS.edgeitems:
|
||||||
start = [self[i] for i in range(0, PRINT_OPTS.edgeitems)]
|
start = [self[i] for i in range(PRINT_OPTS.edgeitems)]
|
||||||
end = [self[i] for i in range(len(self) - PRINT_OPTS.edgeitems, len(self))]
|
end = [self[i] for i in range(len(self) - PRINT_OPTS.edgeitems, len(self))]
|
||||||
return torch.stack([get_summarized_data(x) for x in (start + end)])
|
return torch.stack([get_summarized_data(x) for x in (start + end)])
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ def get_type_a_related_to_b(
|
||||||
for s in base_name_to_sets_of_related_ops.values():
|
for s in base_name_to_sets_of_related_ops.values():
|
||||||
s_list = list(s)
|
s_list = list(s)
|
||||||
# add every bidirectional pair
|
# add every bidirectional pair
|
||||||
for idx_0 in range(0, len(s_list)):
|
for idx_0 in range(len(s_list)):
|
||||||
for idx_1 in range(idx_0, len(s_list)):
|
for idx_1 in range(idx_0, len(s_list)):
|
||||||
type_a_related_to_b.add((s_list[idx_0], s_list[idx_1]))
|
type_a_related_to_b.add((s_list[idx_0], s_list[idx_1]))
|
||||||
type_a_related_to_b.add((s_list[idx_1], s_list[idx_0]))
|
type_a_related_to_b.add((s_list[idx_1], s_list[idx_0]))
|
||||||
|
|
|
||||||
|
|
@ -158,9 +158,9 @@ class ActivationSparsifier:
|
||||||
# data should be a list [aggregated over each feature only]
|
# data should be a list [aggregated over each feature only]
|
||||||
if data is None:
|
if data is None:
|
||||||
out_data = [
|
out_data = [
|
||||||
0 for _ in range(0, len(features))
|
0 for _ in range(len(features))
|
||||||
] # create one in case of 1st forward
|
] # create one in case of 1st forward
|
||||||
self.state[name]["mask"] = [0 for _ in range(0, len(features))]
|
self.state[name]["mask"] = [0 for _ in range(len(features))]
|
||||||
else:
|
else:
|
||||||
out_data = data # a list
|
out_data = data # a list
|
||||||
|
|
||||||
|
|
@ -336,7 +336,7 @@ class ActivationSparsifier:
|
||||||
return input_data * mask
|
return input_data * mask
|
||||||
else:
|
else:
|
||||||
# apply per feature, feature_dim
|
# apply per feature, feature_dim
|
||||||
for feature_idx in range(0, len(features)):
|
for feature_idx in range(len(features)):
|
||||||
feature = (
|
feature = (
|
||||||
torch.Tensor([features[feature_idx]])
|
torch.Tensor([features[feature_idx]])
|
||||||
.long()
|
.long()
|
||||||
|
|
|
||||||
|
|
@ -99,7 +99,7 @@ def sparsify_model(path_to_model, sparsified_model_dump_path):
|
||||||
sparse_block_shapes (List of tuples)
|
sparse_block_shapes (List of tuples)
|
||||||
List of sparse block shapes to be sparsified on
|
List of sparse block shapes to be sparsified on
|
||||||
"""
|
"""
|
||||||
sparsity_levels = [sl / 10 for sl in range(0, 10)]
|
sparsity_levels = [sl / 10 for sl in range(10)]
|
||||||
sparsity_levels += [0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0]
|
sparsity_levels += [0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0]
|
||||||
|
|
||||||
norms = ["L1", "L2"]
|
norms = ["L1", "L2"]
|
||||||
|
|
|
||||||
|
|
@ -299,7 +299,7 @@ class TestTrainingAwareCallback(TestCase):
|
||||||
self._check_on_train_start(pl_module, callback, sparsifier_args, scheduler_args)
|
self._check_on_train_start(pl_module, callback, sparsifier_args, scheduler_args)
|
||||||
|
|
||||||
num_epochs = 5
|
num_epochs = 5
|
||||||
for _ in range(0, num_epochs):
|
for _ in range(num_epochs):
|
||||||
self._check_on_train_epoch_start(pl_module, callback)
|
self._check_on_train_epoch_start(pl_module, callback)
|
||||||
self._simulate_update_param_model(pl_module)
|
self._simulate_update_param_model(pl_module)
|
||||||
self._check_on_train_epoch_end(pl_module, callback)
|
self._check_on_train_epoch_end(pl_module, callback)
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ class NearlyDiagonalSparsifier(base_sparsifier.BaseSparsifier):
|
||||||
"nearliness cannot be larger than the dimensions of tensor."
|
"nearliness cannot be larger than the dimensions of tensor."
|
||||||
)
|
)
|
||||||
|
|
||||||
for row in range(0, height):
|
for row in range(height):
|
||||||
# Bounds of entries that needs to be set to 1
|
# Bounds of entries that needs to be set to 1
|
||||||
low = max(0, row - dist_to_diagonal)
|
low = max(0, row - dist_to_diagonal)
|
||||||
high = min(width, row + dist_to_diagonal + 1)
|
high = min(width, row + dist_to_diagonal + 1)
|
||||||
|
|
|
||||||
|
|
@ -68,10 +68,10 @@ class APoTObserver(ObserverBase):
|
||||||
p_all = []
|
p_all = []
|
||||||
|
|
||||||
# create levels
|
# create levels
|
||||||
for i in range(0, self.n):
|
for i in range(self.n):
|
||||||
p_curr = torch.tensor([0])
|
p_curr = torch.tensor([0])
|
||||||
|
|
||||||
for j in range(0, (2**self.k - 2) + 1):
|
for j in range((2**self.k - 2) + 1):
|
||||||
curr_ele = 2 ** (-(i + j * self.n))
|
curr_ele = 2 ** (-(i + j * self.n))
|
||||||
p_append = torch.tensor([curr_ele])
|
p_append = torch.tensor([curr_ele])
|
||||||
p_curr = torch.cat((p_curr, p_append))
|
p_curr = torch.cat((p_curr, p_append))
|
||||||
|
|
|
||||||
|
|
@ -1159,7 +1159,7 @@ class FakeQuantPerChannel(torch.autograd.Function):
|
||||||
f"Expecting input to have dtype torch.float32, but got dtype: {input.dtype}"
|
f"Expecting input to have dtype torch.float32, but got dtype: {input.dtype}"
|
||||||
)
|
)
|
||||||
assert axis < input.dim(), f"Expecting axis to be < {input.dim()}"
|
assert axis < input.dim(), f"Expecting axis to be < {input.dim()}"
|
||||||
broadcast_dims = list(range(0, axis)) + list(range(axis + 1, input.ndim))
|
broadcast_dims = list(range(axis)) + list(range(axis + 1, input.ndim))
|
||||||
unsqueeze_scales = _unsqueeze_multiple(scales, broadcast_dims)
|
unsqueeze_scales = _unsqueeze_multiple(scales, broadcast_dims)
|
||||||
unsqueeze_zero_points = _unsqueeze_multiple(zero_points, broadcast_dims)
|
unsqueeze_zero_points = _unsqueeze_multiple(zero_points, broadcast_dims)
|
||||||
temp = torch.round(input * (1.0 / unsqueeze_scales)) + unsqueeze_zero_points
|
temp = torch.round(input * (1.0 / unsqueeze_scales)) + unsqueeze_zero_points
|
||||||
|
|
|
||||||
|
|
@ -1212,7 +1212,7 @@ class KinetoStepTracker:
|
||||||
"Profiler step count has increased more than 1 - "
|
"Profiler step count has increased more than 1 - "
|
||||||
f"current_step = {cls._current_step} step dict = {cls._step_dict}"
|
f"current_step = {cls._current_step} step dict = {cls._step_dict}"
|
||||||
)
|
)
|
||||||
for _ in range(0, delta):
|
for _ in range(delta):
|
||||||
_kineto_step()
|
_kineto_step()
|
||||||
cls._current_step = new_step
|
cls._current_step = new_step
|
||||||
return cls._current_step
|
return cls._current_step
|
||||||
|
|
|
||||||
|
|
@ -162,7 +162,7 @@ def coalesce(layout: Layout, profile: LayoutProfile = None) -> Layout:
|
||||||
assert len(layout) >= len(profile)
|
assert len(layout) >= len(profile)
|
||||||
return make_layout(
|
return make_layout(
|
||||||
chain(
|
chain(
|
||||||
(coalesce(layout[i], profile[i]) for i in range(0, len(profile))), # type: ignore[arg-type]
|
(coalesce(layout[i], profile[i]) for i in range(len(profile))), # type: ignore[arg-type]
|
||||||
(layout[i] for i in range(len(profile), len(layout))),
|
(layout[i] for i in range(len(profile), len(layout))),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -203,7 +203,7 @@ def filter(layout: Layout, profile: LayoutProfile = None) -> Layout:
|
||||||
assert len(layout) >= len(profile)
|
assert len(layout) >= len(profile)
|
||||||
return make_layout(
|
return make_layout(
|
||||||
chain(
|
chain(
|
||||||
(filter(layout[i], profile[i]) for i in range(0, len(profile))), # type: ignore[arg-type]
|
(filter(layout[i], profile[i]) for i in range(len(profile))), # type: ignore[arg-type]
|
||||||
(layout[i] for i in range(len(profile), len(layout))),
|
(layout[i] for i in range(len(profile), len(layout))),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -233,7 +233,7 @@ def composition(layoutA: Layout, layoutB: LayoutInput) -> Layout:
|
||||||
assert len(layoutA) >= len(layoutB)
|
assert len(layoutA) >= len(layoutB)
|
||||||
return make_layout(
|
return make_layout(
|
||||||
chain(
|
chain(
|
||||||
(composition(layoutA[i], layoutB[i]) for i in range(0, len(layoutB))), # type: ignore[arg-type]
|
(composition(layoutA[i], layoutB[i]) for i in range(len(layoutB))), # type: ignore[arg-type]
|
||||||
(layoutA[i] for i in range(len(layoutB), len(layoutA))),
|
(layoutA[i] for i in range(len(layoutB), len(layoutA))),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -371,7 +371,7 @@ def logical_divide(layoutA: Layout, layoutB: LayoutInput) -> Layout:
|
||||||
chain(
|
chain(
|
||||||
(
|
(
|
||||||
logical_divide(layoutA[i], layoutB[i]) # type: ignore[arg-type]
|
logical_divide(layoutA[i], layoutB[i]) # type: ignore[arg-type]
|
||||||
for i in range(0, len(layoutB))
|
for i in range(len(layoutB))
|
||||||
),
|
),
|
||||||
(layoutA[i] for i in range(len(layoutB), len(layoutA))),
|
(layoutA[i] for i in range(len(layoutB), len(layoutA))),
|
||||||
)
|
)
|
||||||
|
|
@ -396,7 +396,7 @@ def logical_product(layoutA: Layout, layoutB: LayoutInput) -> Layout:
|
||||||
chain(
|
chain(
|
||||||
(
|
(
|
||||||
logical_product(layoutA[i], layoutB[i]) # type: ignore[arg-type]
|
logical_product(layoutA[i], layoutB[i]) # type: ignore[arg-type]
|
||||||
for i in range(0, len(layoutB))
|
for i in range(len(layoutB))
|
||||||
),
|
),
|
||||||
(layoutA[i] for i in range(len(layoutB), len(layoutA))),
|
(layoutA[i] for i in range(len(layoutB), len(layoutA))),
|
||||||
)
|
)
|
||||||
|
|
@ -421,14 +421,14 @@ def hier_unzip(
|
||||||
# A layout with shape ((A,a),(B,b),(C,c))
|
# A layout with shape ((A,a),(B,b),(C,c))
|
||||||
split = make_layout(
|
split = make_layout(
|
||||||
hier_unzip(splitter, layoutA[i], layoutB[i]) # type: ignore[arg-type]
|
hier_unzip(splitter, layoutA[i], layoutB[i]) # type: ignore[arg-type]
|
||||||
for i in range(0, len(layoutB))
|
for i in range(len(layoutB))
|
||||||
)
|
)
|
||||||
# Gather to shape ((A,B,C,...),(a,b,c,...,y,z))
|
# Gather to shape ((A,B,C,...),(a,b,c,...,y,z))
|
||||||
return make_layout(
|
return make_layout(
|
||||||
make_layout(split[i][0] for i in range(0, len(layoutB))), # type: ignore[arg-type]
|
make_layout(split[i][0] for i in range(len(layoutB))), # type: ignore[arg-type]
|
||||||
make_layout(
|
make_layout(
|
||||||
chain( # type: ignore[arg-type]
|
chain( # type: ignore[arg-type]
|
||||||
(split[i][1] for i in range(0, len(layoutB))),
|
(split[i][1] for i in range(len(layoutB))),
|
||||||
(layoutA[i] for i in range(len(layoutB), len(layoutA))),
|
(layoutA[i] for i in range(len(layoutB), len(layoutA))),
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -1671,7 +1671,7 @@ def _low_contention_all_gather(
|
||||||
local_buf.copy_(tensor)
|
local_buf.copy_(tensor)
|
||||||
# pull
|
# pull
|
||||||
symm_mem.barrier()
|
symm_mem.barrier()
|
||||||
for step in range(0, world_size):
|
for step in range(world_size):
|
||||||
remote_rank = (rank - step) % world_size
|
remote_rank = (rank - step) % world_size
|
||||||
src_buf = symm_mem.get_buffer(remote_rank, tensor.shape, tensor.dtype)
|
src_buf = symm_mem.get_buffer(remote_rank, tensor.shape, tensor.dtype)
|
||||||
chunks[remote_rank].copy_(src_buf)
|
chunks[remote_rank].copy_(src_buf)
|
||||||
|
|
@ -1706,7 +1706,7 @@ def _low_contention_reduce_scatter_with_symm_mem_input(
|
||||||
with _get_backend_stream():
|
with _get_backend_stream():
|
||||||
# pull + offline reduction
|
# pull + offline reduction
|
||||||
symm_mem.barrier()
|
symm_mem.barrier()
|
||||||
for step in range(0, world_size):
|
for step in range(world_size):
|
||||||
remote_rank = (rank - step) % world_size
|
remote_rank = (rank - step) % world_size
|
||||||
src_buf = symm_mem.get_buffer(
|
src_buf = symm_mem.get_buffer(
|
||||||
remote_rank,
|
remote_rank,
|
||||||
|
|
@ -1743,7 +1743,7 @@ def _low_contention_reduce_scatter_with_workspace(
|
||||||
with _get_backend_stream():
|
with _get_backend_stream():
|
||||||
# push + offline reduction
|
# push + offline reduction
|
||||||
workspace.barrier()
|
workspace.barrier()
|
||||||
for step in range(0, world_size):
|
for step in range(world_size):
|
||||||
remote_rank = (rank - step) % world_size
|
remote_rank = (rank - step) % world_size
|
||||||
dst_buf = workspace.get_buffer(
|
dst_buf = workspace.get_buffer(
|
||||||
remote_rank, chunks[0].shape, chunks[0].dtype, chunks[0].numel() * rank
|
remote_rank, chunks[0].shape, chunks[0].dtype, chunks[0].numel() * rank
|
||||||
|
|
|
||||||
|
|
@ -727,7 +727,7 @@ class MultiprocessContext(PContext):
|
||||||
# pipe. Hence to prevent deadlocks on large return values,
|
# pipe. Hence to prevent deadlocks on large return values,
|
||||||
# we opportunistically try queue.get on each join call
|
# we opportunistically try queue.get on each join call
|
||||||
# See: https://docs.python.org/2/library/multiprocessing.html#all-platforms
|
# See: https://docs.python.org/2/library/multiprocessing.html#all-platforms
|
||||||
for local_rank in range(0, self.nprocs):
|
for local_rank in range(self.nprocs):
|
||||||
return_queue = self._ret_vals[local_rank]
|
return_queue = self._ret_vals[local_rank]
|
||||||
if not return_queue.empty():
|
if not return_queue.empty():
|
||||||
# save the return values temporarily into a member var
|
# save the return values temporarily into a member var
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ class MultiprocessingRequestQueue(RequestQueue):
|
||||||
def get(self, size, timeout: float) -> list[TimerRequest]:
|
def get(self, size, timeout: float) -> list[TimerRequest]:
|
||||||
requests = []
|
requests = []
|
||||||
wait = timeout
|
wait = timeout
|
||||||
for _ in range(0, size):
|
for _ in range(size):
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -107,7 +107,7 @@ class DTensorSpec:
|
||||||
# follow default left-to-right device order if shard_order is not specified
|
# follow default left-to-right device order if shard_order is not specified
|
||||||
tensor_dim_to_mesh_dims: defaultdict[int, list[int]] = defaultdict(list)
|
tensor_dim_to_mesh_dims: defaultdict[int, list[int]] = defaultdict(list)
|
||||||
mesh_ndim = len(placements)
|
mesh_ndim = len(placements)
|
||||||
for mesh_dim in range(0, mesh_ndim):
|
for mesh_dim in range(mesh_ndim):
|
||||||
# shard_order doesn't work with _StridedShard
|
# shard_order doesn't work with _StridedShard
|
||||||
if isinstance(placements[mesh_dim], _StridedShard):
|
if isinstance(placements[mesh_dim], _StridedShard):
|
||||||
return ()
|
return ()
|
||||||
|
|
|
||||||
|
|
@ -306,7 +306,7 @@ def _all_gather_dtensor(
|
||||||
placements = list(copy.deepcopy(tensor.placements))
|
placements = list(copy.deepcopy(tensor.placements))
|
||||||
# FSDP + TP: [Shard(0), tp_placement] -> [Replicate(), tp_placement]
|
# FSDP + TP: [Shard(0), tp_placement] -> [Replicate(), tp_placement]
|
||||||
# HSDP + TP: [Replicate(), Shard(0), tp_placement] -> [Replicate(), Replicate(), tp_placement]
|
# HSDP + TP: [Replicate(), Shard(0), tp_placement] -> [Replicate(), Replicate(), tp_placement]
|
||||||
for i in range(0, len(placements) - 1):
|
for i in range(len(placements) - 1):
|
||||||
placements[i] = Replicate()
|
placements[i] = Replicate()
|
||||||
tensor = tensor.redistribute(
|
tensor = tensor.redistribute(
|
||||||
device_mesh=tensor.device_mesh,
|
device_mesh=tensor.device_mesh,
|
||||||
|
|
|
||||||
|
|
@ -1112,7 +1112,7 @@ def chunk_default(func, *args, **kwargs):
|
||||||
# the input number; it can be counter-intuitive, but it matches dense behavior.
|
# the input number; it can be counter-intuitive, but it matches dense behavior.
|
||||||
return [
|
return [
|
||||||
NestedTensor(values=chunk_values[i], **(nested_kwargs[i]))
|
NestedTensor(values=chunk_values[i], **(nested_kwargs[i]))
|
||||||
for i in range(0, len(chunk_values))
|
for i in range(len(chunk_values))
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
return [
|
return [
|
||||||
|
|
|
||||||
|
|
@ -1005,7 +1005,7 @@ def _interpolate_size_to_scales(g: jit_utils.GraphContext, input, output_size, d
|
||||||
if i < 2
|
if i < 2
|
||||||
else float(output_size[-(dim - i)])
|
else float(output_size[-(dim - i)])
|
||||||
/ float(input.type().sizes()[-(dim - i)])
|
/ float(input.type().sizes()[-(dim - i)])
|
||||||
for i in range(0, dim)
|
for i in range(dim)
|
||||||
]
|
]
|
||||||
scales = g.op(
|
scales = g.op(
|
||||||
"Constant", value_t=torch.tensor(scales_constant, dtype=torch.float32)
|
"Constant", value_t=torch.tensor(scales_constant, dtype=torch.float32)
|
||||||
|
|
|
||||||
|
|
@ -331,7 +331,7 @@ def unfold(g: jit_utils.GraphContext, input, dimension, size, step):
|
||||||
|
|
||||||
ndim = symbolic_helper._get_tensor_rank(input)
|
ndim = symbolic_helper._get_tensor_rank(input)
|
||||||
assert ndim is not None
|
assert ndim is not None
|
||||||
perm = list(range(0, ndim))
|
perm = list(range(ndim))
|
||||||
perm.append(perm.pop(dimension))
|
perm.append(perm.pop(dimension))
|
||||||
|
|
||||||
unsqueeze_list = []
|
unsqueeze_list = []
|
||||||
|
|
|
||||||
|
|
@ -116,7 +116,7 @@ def _interpolate(name, dim, interpolate_mode):
|
||||||
if i < 2
|
if i < 2
|
||||||
else float(output_size[-(dim - i)])
|
else float(output_size[-(dim - i)])
|
||||||
/ float(input.type().sizes()[-(dim - i)])
|
/ float(input.type().sizes()[-(dim - i)])
|
||||||
for i in range(0, dim)
|
for i in range(dim)
|
||||||
]
|
]
|
||||||
return g.op("Upsample", input, mode_s=interpolate_mode, scales_f=scales)
|
return g.op("Upsample", input, mode_s=interpolate_mode, scales_f=scales)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -840,7 +840,7 @@ def t(g: jit_utils.GraphContext, self):
|
||||||
def numpy_T(g: jit_utils.GraphContext, input):
|
def numpy_T(g: jit_utils.GraphContext, input):
|
||||||
ndim = symbolic_helper._get_tensor_rank(input)
|
ndim = symbolic_helper._get_tensor_rank(input)
|
||||||
assert ndim is not None
|
assert ndim is not None
|
||||||
perm = list(reversed(range(0, ndim)))
|
perm = list(reversed(range(ndim)))
|
||||||
return g.op("Transpose", input, perm_i=perm)
|
return g.op("Transpose", input, perm_i=perm)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -990,7 +990,7 @@ def transpose(g: jit_utils.GraphContext, self, dim0, dim1):
|
||||||
@_onnx_symbolic("aten::permute")
|
@_onnx_symbolic("aten::permute")
|
||||||
@symbolic_helper.parse_args("v", "is")
|
@symbolic_helper.parse_args("v", "is")
|
||||||
def permute(g: jit_utils.GraphContext, self, dims):
|
def permute(g: jit_utils.GraphContext, self, dims):
|
||||||
if dims == list(range(0, len(dims))):
|
if dims == list(range(len(dims))):
|
||||||
return self
|
return self
|
||||||
return g.op("Transpose", self, perm_i=dims)
|
return g.op("Transpose", self, perm_i=dims)
|
||||||
|
|
||||||
|
|
@ -1368,7 +1368,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
|
||||||
)
|
)
|
||||||
ceiled_output_dim = [
|
ceiled_output_dim = [
|
||||||
math.ceil((dim[i] + 2 * padding[i] - kernel_size[i]) / float(stride[i])) + 1
|
math.ceil((dim[i] + 2 * padding[i] - kernel_size[i]) / float(stride[i])) + 1
|
||||||
for i in range(0, len(padding))
|
for i in range(len(padding))
|
||||||
]
|
]
|
||||||
# ensure last pooling starts inside
|
# ensure last pooling starts inside
|
||||||
ceiled_output_dim = [
|
ceiled_output_dim = [
|
||||||
|
|
@ -1377,7 +1377,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
|
||||||
if (((ceiled_output_dim[i] - 1) * stride[i]) >= (dim[i] + padding[i]))
|
if (((ceiled_output_dim[i] - 1) * stride[i]) >= (dim[i] + padding[i]))
|
||||||
else ceiled_output_dim[i]
|
else ceiled_output_dim[i]
|
||||||
)
|
)
|
||||||
for i in range(0, len(ceiled_output_dim))
|
for i in range(len(ceiled_output_dim))
|
||||||
]
|
]
|
||||||
padding_ceil = [
|
padding_ceil = [
|
||||||
(
|
(
|
||||||
|
|
@ -1392,7 +1392,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
for i in range(0, len(padding))
|
for i in range(len(padding))
|
||||||
]
|
]
|
||||||
# ensure padding is not > kernel_size
|
# ensure padding is not > kernel_size
|
||||||
padding_ceil = [
|
padding_ceil = [
|
||||||
|
|
@ -1405,7 +1405,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
|
||||||
if ((padding_ceil[i] + 2 * padding[i]) >= (kernel_size[i]))
|
if ((padding_ceil[i] + 2 * padding[i]) >= (kernel_size[i]))
|
||||||
else int(padding_ceil[i])
|
else int(padding_ceil[i])
|
||||||
)
|
)
|
||||||
for i in range(0, len(padding_ceil))
|
for i in range(len(padding_ceil))
|
||||||
]
|
]
|
||||||
return padding_ceil
|
return padding_ceil
|
||||||
|
|
||||||
|
|
@ -1697,14 +1697,14 @@ def _adaptive_pool(name, type, tuple_fn, fn=None):
|
||||||
name, "input size not accessible", input
|
name, "input size not accessible", input
|
||||||
)
|
)
|
||||||
# verify if output size % input size = 0 for all dim
|
# verify if output size % input size = 0 for all dim
|
||||||
mod = [dim[i] % output_size[i] for i in range(0, len(dim))]
|
mod = [dim[i] % output_size[i] for i in range(len(dim))]
|
||||||
if mod != [0] * len(mod):
|
if mod != [0] * len(mod):
|
||||||
if output_size == [1] * len(output_size):
|
if output_size == [1] * len(output_size):
|
||||||
return g.op("GlobalMaxPool", input), None
|
return g.op("GlobalMaxPool", input), None
|
||||||
return symbolic_helper._unimplemented(
|
return symbolic_helper._unimplemented(
|
||||||
name, "output size that are not factor of input size", output_size_value
|
name, "output size that are not factor of input size", output_size_value
|
||||||
)
|
)
|
||||||
k = [int(dim[i] / output_size[i]) for i in range(0, len(dim))]
|
k = [int(dim[i] / output_size[i]) for i in range(len(dim))]
|
||||||
# call max_poolxd_with_indices to get indices in the output
|
# call max_poolxd_with_indices to get indices in the output
|
||||||
if type == "MaxPool":
|
if type == "MaxPool":
|
||||||
# pyrefly: ignore # not-callable
|
# pyrefly: ignore # not-callable
|
||||||
|
|
@ -2906,7 +2906,7 @@ def unfold(g: jit_utils.GraphContext, input, dimension, size, step):
|
||||||
for low, hi in zip(low_indices, hi_indices)
|
for low, hi in zip(low_indices, hi_indices)
|
||||||
]
|
]
|
||||||
ndim = len(sizes)
|
ndim = len(sizes)
|
||||||
perm = list(range(0, ndim))
|
perm = list(range(ndim))
|
||||||
perm.append(perm.pop(dimension))
|
perm.append(perm.pop(dimension))
|
||||||
unsqueeze = [
|
unsqueeze = [
|
||||||
symbolic_helper._unsqueeze_helper(
|
symbolic_helper._unsqueeze_helper(
|
||||||
|
|
|
||||||
|
|
@ -11615,7 +11615,7 @@ def reference_searchsorted(sorted_sequence, boundary, out_int32=False, right=Fal
|
||||||
# numpy searchsorted only supports 1D inputs so we split up ND inputs
|
# numpy searchsorted only supports 1D inputs so we split up ND inputs
|
||||||
orig_shape = boundary.shape
|
orig_shape = boundary.shape
|
||||||
num_splits = np.prod(sorted_sequence.shape[:-1])
|
num_splits = np.prod(sorted_sequence.shape[:-1])
|
||||||
splits = range(0, num_splits)
|
splits = range(num_splits)
|
||||||
sorted_sequence, boundary = sorted_sequence.reshape(num_splits, -1), boundary.reshape(num_splits, -1)
|
sorted_sequence, boundary = sorted_sequence.reshape(num_splits, -1), boundary.reshape(num_splits, -1)
|
||||||
if sorter is not None:
|
if sorter is not None:
|
||||||
sorter = sorter.reshape(num_splits, -1)
|
sorter = sorter.reshape(num_splits, -1)
|
||||||
|
|
@ -16258,7 +16258,7 @@ op_db: list[OpInfo] = [
|
||||||
aten_backward_name='_prelu_kernel_backward',
|
aten_backward_name='_prelu_kernel_backward',
|
||||||
ref=lambda x, weight:
|
ref=lambda x, weight:
|
||||||
np.maximum(0., x) + np.minimum(0., x) *
|
np.maximum(0., x) + np.minimum(0., x) *
|
||||||
(weight if x.ndim == 1 else weight.reshape([weight.size if i == 1 else 1 for i in range(0, x.ndim)])),
|
(weight if x.ndim == 1 else weight.reshape([weight.size if i == 1 else 1 for i in range(x.ndim)])),
|
||||||
dtypes=floating_types_and(torch.bfloat16, torch.float16),
|
dtypes=floating_types_and(torch.bfloat16, torch.float16),
|
||||||
supports_forward_ad=True,
|
supports_forward_ad=True,
|
||||||
supports_fwgrad_bwgrad=True,
|
supports_fwgrad_bwgrad=True,
|
||||||
|
|
|
||||||
|
|
@ -2896,7 +2896,7 @@ def _multilabelmarginloss_reference(input, target):
|
||||||
|
|
||||||
sum = 0
|
sum = 0
|
||||||
for target_index in targets:
|
for target_index in targets:
|
||||||
for i in range(0, len(input)):
|
for i in range(len(input)):
|
||||||
if i not in targets:
|
if i not in targets:
|
||||||
sum += max(0, 1 - input[target_index] + input[i])
|
sum += max(0, 1 - input[target_index] + input[i])
|
||||||
|
|
||||||
|
|
@ -2914,7 +2914,7 @@ def multilabelmarginloss_reference(input, target, reduction='mean'):
|
||||||
n = input.size(0)
|
n = input.size(0)
|
||||||
dim = input.size(1)
|
dim = input.size(1)
|
||||||
output = input.new(n).zero_()
|
output = input.new(n).zero_()
|
||||||
for i in range(0, n):
|
for i in range(n):
|
||||||
output[i] = _multilabelmarginloss_reference(input[i], target[i])
|
output[i] = _multilabelmarginloss_reference(input[i], target[i])
|
||||||
|
|
||||||
if reduction == 'mean':
|
if reduction == 'mean':
|
||||||
|
|
@ -2955,7 +2955,7 @@ def _multimarginloss_reference(input, target_idx, p, margin, weight):
|
||||||
weight = input.new(len(input)).fill_(1)
|
weight = input.new(len(input)).fill_(1)
|
||||||
|
|
||||||
output = 0
|
output = 0
|
||||||
for i in range(0, len(input)):
|
for i in range(len(input)):
|
||||||
if i != target_idx:
|
if i != target_idx:
|
||||||
output += weight[target_idx] * (max(0, (margin - input[target_idx] + input[i])) ** p)
|
output += weight[target_idx] * (max(0, (margin - input[target_idx] + input[i])) ** p)
|
||||||
return output
|
return output
|
||||||
|
|
@ -2972,7 +2972,7 @@ def multimarginloss_reference(input, target, p=1, margin=1, weight=None, reducti
|
||||||
n = input.size(0)
|
n = input.size(0)
|
||||||
dim = input.size(1)
|
dim = input.size(1)
|
||||||
output = input.new(n)
|
output = input.new(n)
|
||||||
for x in range(0, n):
|
for x in range(n):
|
||||||
output[x] = _multimarginloss_reference(input[x], target[x], p, margin, weight)
|
output[x] = _multimarginloss_reference(input[x], target[x], p, margin, weight)
|
||||||
|
|
||||||
if reduction == 'mean':
|
if reduction == 'mean':
|
||||||
|
|
@ -2987,7 +2987,7 @@ def multimarginloss_reference(input, target, p=1, margin=1, weight=None, reducti
|
||||||
def cosineembeddingloss_reference(input1, input2, target, margin=0, reduction='mean'):
|
def cosineembeddingloss_reference(input1, input2, target, margin=0, reduction='mean'):
|
||||||
def _cos(a, b):
|
def _cos(a, b):
|
||||||
cos = a.new(a.size(0))
|
cos = a.new(a.size(0))
|
||||||
for i in range(0, a.size(0)):
|
for i in range(a.size(0)):
|
||||||
cos[i] = (a[i] * b[i]).sum() / ((((a[i] * a[i]).sum() + 1e-12) * ((b[i] * b[i]).sum() + 1e-12)) ** 0.5)
|
cos[i] = (a[i] * b[i]).sum() / ((((a[i] * a[i]).sum() + 1e-12) * ((b[i] * b[i]).sum() + 1e-12)) ** 0.5)
|
||||||
return cos
|
return cos
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -705,7 +705,7 @@ class LocalDTensorTestBase(DTensorTestBase):
|
||||||
self.skipTest(msg)
|
self.skipTest(msg)
|
||||||
|
|
||||||
def _get_local_tensor_mode(self):
|
def _get_local_tensor_mode(self):
|
||||||
return LocalTensorMode(frozenset(range(0, self.world_size)))
|
return LocalTensorMode(frozenset(range(self.world_size)))
|
||||||
|
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
super().setUp()
|
super().setUp()
|
||||||
|
|
|
||||||
|
|
@ -658,13 +658,13 @@ class DistributedTest:
|
||||||
return (group, group_id, rank)
|
return (group, group_id, rank)
|
||||||
|
|
||||||
def _init_full_group_test(self, **kwargs):
|
def _init_full_group_test(self, **kwargs):
|
||||||
group = list(range(0, dist.get_world_size()))
|
group = list(range(dist.get_world_size()))
|
||||||
group_id = dist.new_group(**kwargs)
|
group_id = dist.new_group(**kwargs)
|
||||||
rank = dist.get_rank()
|
rank = dist.get_rank()
|
||||||
return (group, group_id, rank)
|
return (group, group_id, rank)
|
||||||
|
|
||||||
def _init_global_test(self):
|
def _init_global_test(self):
|
||||||
group = list(range(0, dist.get_world_size()))
|
group = list(range(dist.get_world_size()))
|
||||||
group_id = dist.group.WORLD
|
group_id = dist.group.WORLD
|
||||||
rank = dist.get_rank()
|
rank = dist.get_rank()
|
||||||
return (group, group_id, rank)
|
return (group, group_id, rank)
|
||||||
|
|
@ -1114,7 +1114,7 @@ class DistributedTest:
|
||||||
averager = averagers.PeriodicModelAverager(
|
averager = averagers.PeriodicModelAverager(
|
||||||
period=period, warmup_steps=warmup_steps
|
period=period, warmup_steps=warmup_steps
|
||||||
)
|
)
|
||||||
for step in range(0, 20):
|
for step in range(20):
|
||||||
# Reset the parameters at every step.
|
# Reset the parameters at every step.
|
||||||
param.data = copy.deepcopy(tensor)
|
param.data = copy.deepcopy(tensor)
|
||||||
for params in model.parameters():
|
for params in model.parameters():
|
||||||
|
|
@ -1143,7 +1143,7 @@ class DistributedTest:
|
||||||
averager = averagers.PeriodicModelAverager(
|
averager = averagers.PeriodicModelAverager(
|
||||||
period=period, warmup_steps=warmup_steps
|
period=period, warmup_steps=warmup_steps
|
||||||
)
|
)
|
||||||
for step in range(0, 20):
|
for step in range(20):
|
||||||
# Reset the parameters at every step.
|
# Reset the parameters at every step.
|
||||||
for param_group in opt.param_groups:
|
for param_group in opt.param_groups:
|
||||||
for params in param_group["params"]:
|
for params in param_group["params"]:
|
||||||
|
|
@ -1203,7 +1203,7 @@ class DistributedTest:
|
||||||
averager = averagers.PeriodicModelAverager(
|
averager = averagers.PeriodicModelAverager(
|
||||||
period=period, warmup_steps=warmup_steps
|
period=period, warmup_steps=warmup_steps
|
||||||
)
|
)
|
||||||
for step in range(0, 20):
|
for step in range(20):
|
||||||
# Reset the parameters at every step.
|
# Reset the parameters at every step.
|
||||||
param.data = copy.deepcopy(tensor)
|
param.data = copy.deepcopy(tensor)
|
||||||
for params in model.parameters():
|
for params in model.parameters():
|
||||||
|
|
@ -1284,7 +1284,7 @@ class DistributedTest:
|
||||||
expected_global_avg_tensor = (
|
expected_global_avg_tensor = (
|
||||||
torch.ones_like(param.data) * sum(range(world_size)) / world_size
|
torch.ones_like(param.data) * sum(range(world_size)) / world_size
|
||||||
)
|
)
|
||||||
for step in range(0, 25):
|
for step in range(25):
|
||||||
# Reset the parameters at every step.
|
# Reset the parameters at every step.
|
||||||
param.data = copy.deepcopy(tensor)
|
param.data = copy.deepcopy(tensor)
|
||||||
for params in model.parameters():
|
for params in model.parameters():
|
||||||
|
|
@ -1390,7 +1390,7 @@ class DistributedTest:
|
||||||
|
|
||||||
for val in ["1", "0"]:
|
for val in ["1", "0"]:
|
||||||
os.environ["TORCH_NCCL_BLOCKING_WAIT"] = val
|
os.environ["TORCH_NCCL_BLOCKING_WAIT"] = val
|
||||||
for src in range(0, world_size):
|
for src in range(world_size):
|
||||||
send_tensor = _build_tensor(rank + 1, device_id=device_id).fill_(
|
send_tensor = _build_tensor(rank + 1, device_id=device_id).fill_(
|
||||||
src
|
src
|
||||||
)
|
)
|
||||||
|
|
@ -1409,7 +1409,7 @@ class DistributedTest:
|
||||||
for req in reqs:
|
for req in reqs:
|
||||||
req.wait()
|
req.wait()
|
||||||
|
|
||||||
for src in range(0, world_size):
|
for src in range(world_size):
|
||||||
self.assertEqual(recv_tensors[src], expected_tensors[src])
|
self.assertEqual(recv_tensors[src], expected_tensors[src])
|
||||||
|
|
||||||
self._barrier()
|
self._barrier()
|
||||||
|
|
@ -1505,7 +1505,7 @@ class DistributedTest:
|
||||||
rank = dist.get_rank()
|
rank = dist.get_rank()
|
||||||
p2p_op_list = []
|
p2p_op_list = []
|
||||||
|
|
||||||
for src in range(0, dist.get_world_size()):
|
for src in range(dist.get_world_size()):
|
||||||
if src == rank:
|
if src == rank:
|
||||||
continue
|
continue
|
||||||
send_tensor = _build_tensor(rank + 1)
|
send_tensor = _build_tensor(rank + 1)
|
||||||
|
|
@ -1528,7 +1528,7 @@ class DistributedTest:
|
||||||
rank = dist.get_rank()
|
rank = dist.get_rank()
|
||||||
p2p_op_list = []
|
p2p_op_list = []
|
||||||
|
|
||||||
for src in range(0, dist.get_world_size()):
|
for src in range(dist.get_world_size()):
|
||||||
if src == rank:
|
if src == rank:
|
||||||
continue
|
continue
|
||||||
send_tensor = _build_tensor(rank + 1)
|
send_tensor = _build_tensor(rank + 1)
|
||||||
|
|
@ -1602,10 +1602,10 @@ class DistributedTest:
|
||||||
tensor = _build_tensor(rank + 1, device_id=device_id)
|
tensor = _build_tensor(rank + 1, device_id=device_id)
|
||||||
profiler_cls = profiler_ctx if profiler_ctx is not None else nullcontext()
|
profiler_cls = profiler_ctx if profiler_ctx is not None else nullcontext()
|
||||||
with profiler_cls as prof:
|
with profiler_cls as prof:
|
||||||
for src in range(0, world_size):
|
for src in range(world_size):
|
||||||
if src == rank:
|
if src == rank:
|
||||||
# Send mode
|
# Send mode
|
||||||
for dst in range(0, world_size):
|
for dst in range(world_size):
|
||||||
if dst == rank:
|
if dst == rank:
|
||||||
continue
|
continue
|
||||||
dist.send(tensor, dst)
|
dist.send(tensor, dst)
|
||||||
|
|
@ -1674,10 +1674,10 @@ class DistributedTest:
|
||||||
tensor = _build_tensor(send_size)
|
tensor = _build_tensor(send_size)
|
||||||
ctx = profiler_ctx if profiler_ctx is not None else nullcontext()
|
ctx = profiler_ctx if profiler_ctx is not None else nullcontext()
|
||||||
with ctx as prof:
|
with ctx as prof:
|
||||||
for src in range(0, dist.get_world_size()):
|
for src in range(dist.get_world_size()):
|
||||||
if src == rank:
|
if src == rank:
|
||||||
# Send mode
|
# Send mode
|
||||||
for dst in range(0, dist.get_world_size()):
|
for dst in range(dist.get_world_size()):
|
||||||
if dst == rank:
|
if dst == rank:
|
||||||
continue
|
continue
|
||||||
dist.send(tensor, dst)
|
dist.send(tensor, dst)
|
||||||
|
|
@ -1742,10 +1742,10 @@ class DistributedTest:
|
||||||
|
|
||||||
ctx = profiler_ctx if profiler_ctx is not None else nullcontext()
|
ctx = profiler_ctx if profiler_ctx is not None else nullcontext()
|
||||||
with ctx as prof:
|
with ctx as prof:
|
||||||
for dst in range(0, dist.get_world_size()):
|
for dst in range(dist.get_world_size()):
|
||||||
if dst == rank:
|
if dst == rank:
|
||||||
# Recv mode
|
# Recv mode
|
||||||
for dst in range(0, dist.get_world_size()):
|
for dst in range(dist.get_world_size()):
|
||||||
if dst == rank:
|
if dst == rank:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -1846,10 +1846,10 @@ class DistributedTest:
|
||||||
tensor = _build_tensor(send_recv_size, value=rank)
|
tensor = _build_tensor(send_recv_size, value=rank)
|
||||||
ctx = profiler_ctx if profiler_ctx is not None else nullcontext()
|
ctx = profiler_ctx if profiler_ctx is not None else nullcontext()
|
||||||
with ctx as prof:
|
with ctx as prof:
|
||||||
for dst in range(0, world_size):
|
for dst in range(world_size):
|
||||||
if dst == rank:
|
if dst == rank:
|
||||||
# Recv mode
|
# Recv mode
|
||||||
for src in range(0, world_size):
|
for src in range(world_size):
|
||||||
if src == rank:
|
if src == rank:
|
||||||
continue
|
continue
|
||||||
output_tensor = _build_tensor(send_recv_size, value=-1)
|
output_tensor = _build_tensor(send_recv_size, value=-1)
|
||||||
|
|
@ -7480,7 +7480,7 @@ class DistributedTest:
|
||||||
for baseline_iter in baseline_num_iters:
|
for baseline_iter in baseline_num_iters:
|
||||||
for offset in iteration_offsets:
|
for offset in iteration_offsets:
|
||||||
mapping = dict.fromkeys(
|
mapping = dict.fromkeys(
|
||||||
range(0, num_early_join_ranks), baseline_iter
|
range(num_early_join_ranks), baseline_iter
|
||||||
)
|
)
|
||||||
# if num_early_join_ranks > 1, ranks > 0 that will join early
|
# if num_early_join_ranks > 1, ranks > 0 that will join early
|
||||||
# iterate offset//2 more times than rank 0, to test nodes
|
# iterate offset//2 more times than rank 0, to test nodes
|
||||||
|
|
|
||||||
|
|
@ -166,7 +166,7 @@ class AllReduce:
|
||||||
# collect all data to the list and make them
|
# collect all data to the list and make them
|
||||||
# all on rank 0 device
|
# all on rank 0 device
|
||||||
tensors = [
|
tensors = [
|
||||||
data[src_rank][i].to(rank_0_device) for src_rank in range(0, len(data))
|
data[src_rank][i].to(rank_0_device) for src_rank in range(len(data))
|
||||||
]
|
]
|
||||||
|
|
||||||
# now mimic reduce across all ranks
|
# now mimic reduce across all ranks
|
||||||
|
|
|
||||||
|
|
@ -266,7 +266,7 @@ class CommonDistAutogradTest(RpcAgentTestFixture):
|
||||||
grads = dist_autograd.get_gradients(context_id)
|
grads = dist_autograd.get_gradients(context_id)
|
||||||
nargs = len(args)
|
nargs = len(args)
|
||||||
ngrads = 0
|
ngrads = 0
|
||||||
for i in range(0, nargs):
|
for i in range(nargs):
|
||||||
if local_grads[i] is not None:
|
if local_grads[i] is not None:
|
||||||
self.assertIn(args[i], grads)
|
self.assertIn(args[i], grads)
|
||||||
self.assertEqual(local_grads[i], grads[args[i]])
|
self.assertEqual(local_grads[i], grads[args[i]])
|
||||||
|
|
@ -1973,7 +1973,7 @@ class DistAutogradTest(CommonDistAutogradTest):
|
||||||
DistAutogradTest._test_clean_context_backward_context_id = context_id
|
DistAutogradTest._test_clean_context_backward_context_id = context_id
|
||||||
|
|
||||||
# Send the context id to all nodes.
|
# Send the context id to all nodes.
|
||||||
for i in range(0, self.world_size):
|
for i in range(self.world_size):
|
||||||
if i != self.rank:
|
if i != self.rank:
|
||||||
rank_distance = (i - self.rank + self.world_size) % self.world_size
|
rank_distance = (i - self.rank + self.world_size) % self.world_size
|
||||||
rpc.rpc_sync(
|
rpc.rpc_sync(
|
||||||
|
|
@ -1988,7 +1988,7 @@ class DistAutogradTest(CommonDistAutogradTest):
|
||||||
self.assertEqual(self.world_size - 1, len(known_context_ids))
|
self.assertEqual(self.world_size - 1, len(known_context_ids))
|
||||||
|
|
||||||
t1 = torch.rand((3, 3), requires_grad=True)
|
t1 = torch.rand((3, 3), requires_grad=True)
|
||||||
for i in range(0, 100):
|
for i in range(100):
|
||||||
dst = self._next_rank()
|
dst = self._next_rank()
|
||||||
t1 = rpc.rpc_sync(worker_name(dst), torch.add, args=(t1, t1))
|
t1 = rpc.rpc_sync(worker_name(dst), torch.add, args=(t1, t1))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1818,7 +1818,7 @@ class RpcTest(RpcAgentTestFixture, RpcTestCommon):
|
||||||
# Spawn multiple threads that send RPCs to ensure keys are correctly
|
# Spawn multiple threads that send RPCs to ensure keys are correctly
|
||||||
# prefixed when there are multiple RPCs being created/in flight at the
|
# prefixed when there are multiple RPCs being created/in flight at the
|
||||||
# same time.
|
# same time.
|
||||||
dst_ranks = [rank for rank in range(0, self.world_size) if rank != self.rank]
|
dst_ranks = [rank for rank in range(self.world_size) if rank != self.rank]
|
||||||
|
|
||||||
def rpc_with_profiling(dst_worker):
|
def rpc_with_profiling(dst_worker):
|
||||||
with _profile() as prof:
|
with _profile() as prof:
|
||||||
|
|
@ -1884,7 +1884,7 @@ class RpcTest(RpcAgentTestFixture, RpcTestCommon):
|
||||||
if self.rank != 1:
|
if self.rank != 1:
|
||||||
return
|
return
|
||||||
|
|
||||||
dst_ranks = [rank for rank in range(0, self.world_size) if rank != self.rank]
|
dst_ranks = [rank for rank in range(self.world_size) if rank != self.rank]
|
||||||
for dst in dst_ranks:
|
for dst in dst_ranks:
|
||||||
dst_worker = worker_name(dst)
|
dst_worker = worker_name(dst)
|
||||||
with _profile() as prof:
|
with _profile() as prof:
|
||||||
|
|
|
||||||
|
|
@ -439,7 +439,7 @@ class JitTestCase(JitCommonTestCase):
|
||||||
state = model.get_debug_state()
|
state = model.get_debug_state()
|
||||||
plan = get_execution_plan(state)
|
plan = get_execution_plan(state)
|
||||||
num_bailouts = plan.code.num_bailouts()
|
num_bailouts = plan.code.num_bailouts()
|
||||||
for i in range(0, num_bailouts):
|
for i in range(num_bailouts):
|
||||||
plan.code.request_bailout(i)
|
plan.code.request_bailout(i)
|
||||||
bailout_outputs = model(*inputs)
|
bailout_outputs = model(*inputs)
|
||||||
self.assertEqual(bailout_outputs, expected)
|
self.assertEqual(bailout_outputs, expected)
|
||||||
|
|
|
||||||
|
|
@ -912,7 +912,7 @@ if has_triton():
|
||||||
b_ptrs = b_ptr + (offs_k[:, None] + offs_bn[None, :])
|
b_ptrs = b_ptr + (offs_k[:, None] + offs_bn[None, :])
|
||||||
|
|
||||||
accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
|
accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
|
||||||
for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)):
|
for k in range(tl.cdiv(K, BLOCK_SIZE_K)):
|
||||||
a = tl.load(a_ptrs, mask=offs_k[None, :] < K - k * BLOCK_SIZE_K, other=0.0)
|
a = tl.load(a_ptrs, mask=offs_k[None, :] < K - k * BLOCK_SIZE_K, other=0.0)
|
||||||
b = tl.load(b_ptrs, mask=offs_k[:, None] < K - k * BLOCK_SIZE_K, other=0.0)
|
b = tl.load(b_ptrs, mask=offs_k[:, None] < K - k * BLOCK_SIZE_K, other=0.0)
|
||||||
accumulator = tl.dot(a, b, accumulator)
|
accumulator = tl.dot(a, b, accumulator)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user