pytorch/torch/csrc/distributed/c10d/HashStore.hpp
Tristan Rice 4b2ae2ac33 c10d: add Collectives abstraction (#125978)
This adds a new `Collectives` API for doing distributed collectives operations. This is intended to replace the [current Elastic store abstraction](https://github.com/pytorch/pytorch/blob/main/torch/distributed/elastic/utils/store.py) with more performant and debugable primitives.

Design doc: https://docs.google.com/document/d/147KcKJXEHvk1Q6tISLbJVvLejHg_1kIhBQeu-8RQxhY/edit

The standard implementation is using `StoreCollectives` but other more performant backends will be added in a follow up PR.

Test plan:

```
python test/distributed/test_collectives.py -v
```

This tests both functionality using multiple threads as well as timeout behavior.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/125978
Approved by: https://github.com/shuqiangzhang
2024-05-17 05:09:11 +00:00

60 lines
1.5 KiB
C++

#pragma once
#include <condition_variable>
#include <mutex>
#include <unordered_map>
#include <torch/csrc/distributed/c10d/Store.hpp>
namespace c10d {
class TORCH_API HashStore : public Store {
public:
~HashStore() override = default;
void set(const std::string& key, const std::vector<uint8_t>& data) override;
std::vector<uint8_t> compareSet(
const std::string& key,
const std::vector<uint8_t>& expectedValue,
const std::vector<uint8_t>& desiredValue) override;
std::vector<uint8_t> get(const std::string& key) override;
void wait(const std::vector<std::string>& keys) override {
wait(keys, timeout_);
}
void wait(
const std::vector<std::string>& keys,
const std::chrono::milliseconds& timeout) override;
int64_t add(const std::string& key, int64_t value) override;
int64_t getNumKeys() override;
bool check(const std::vector<std::string>& keys) override;
bool deleteKey(const std::string& key) override;
void append(const std::string& key, const std::vector<uint8_t>& value)
override;
std::vector<std::vector<uint8_t>> multiGet(
const std::vector<std::string>& keys) override;
void multiSet(
const std::vector<std::string>& keys,
const std::vector<std::vector<uint8_t>>& values) override;
// Returns true if this store support append, multiGet and multiSet
bool hasExtendedApi() const override;
protected:
std::unordered_map<std::string, std::vector<uint8_t>> map_;
std::mutex m_;
std::condition_variable cv_;
};
} // namespace c10d