pytorch/torch/csrc/distributed/c10d/exception.h
Tristan Rice 98c892749b c10d/Store: add nonblocking mode to queue_pop (#151485)
This adds a non-blocking mode to queue_pop. This allows for workers to poll if work is ready without blocking the main loop. This is useful for the case where you want to have a GPU have maximum utilization when something only periodically is sent on the queue.

We also expose a `torch.distributed.QueueEmptyError` so users can catch the error and handle it accordingly.

Test plan:

```
pytest test/distributed/test_store.py -k queue -v -s -x
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/151485
Approved by: https://github.com/fduwjj, https://github.com/tianfengfrank
2025-04-18 02:14:50 +00:00

40 lines
1.3 KiB
C++

// Copyright (c) Facebook, Inc. and its affiliates.
// All rights reserved.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
#pragma once
#include <c10/macros/Macros.h>
#include <c10/util/Exception.h>
// Utility macro similar to C10_THROW_ERROR, the major difference is that this
// macro handles exception types defined in the c10d namespace, whereas
// C10_THROW_ERROR requires an exception to be defined in the c10 namespace.
#define C10D_THROW_ERROR(err_type, ...) \
throw ::c10d::err_type( \
{__func__, __FILE__, static_cast<uint32_t>(__LINE__)}, \
c10::str(__VA_ARGS__))
#define C10D_CHECK_WITH(error_t, cond, ...) \
if (C10_UNLIKELY_OR_CONST(!(cond))) { \
C10D_THROW_ERROR( \
error_t, TORCH_CHECK_MSG(cond, "", c10::str(__VA_ARGS__))); \
}
namespace c10d {
using c10::DistNetworkError;
using c10::DistStoreError;
class TORCH_API SocketError : public DistNetworkError {
using DistNetworkError::DistNetworkError;
};
class TORCH_API TimeoutError : public DistNetworkError {
using DistNetworkError::DistNetworkError;
};
} // namespace c10d