mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/42249 Main change is to bring Caffe2's superior error messages for cuda initialization into c10 and use them in all code paths. Basic logic: | Case | Call to device_count() | init_cuda, e.g. allocating tensor | | -- | -- | -- | | all good | non-zero | just works | | no gpus | 0, no warning | throw exception with good message | | driver issues | 0, produce warning | throw exception with good message | | out of memory with ASAN | 0, produce warning| throw exception with ASAN message | Previously, the error thrown from init_cuda was very generic and the ASAN warning (if any) was buried in the logs. Other clean up changes: * cache device_count() always in a static variable * move all asan macros in c10 Test Plan: Hard to unittest because of build modes. Verified manually that the behavior from the table above holds by running the following script in different modes (ASAN/no-ASAN, CUDA_VISIBLE_DEVICES=): ``` print('before import') import torch print('after import') print('devices: ', torch.cuda.device_count()) x = torch.tensor([1,2,3]) print('tensor creation') x = x.cuda() print('moved to cuda') ``` Reviewed By: ngimel Differential Revision: D22824329 fbshipit-source-id: 5314007313a3897fc955b02f8b21b661ae35fdf5
78 lines
2.1 KiB
C++
78 lines
2.1 KiB
C++
#ifndef CAFFE2_OPERATORS_INDEX_HASH_OPS_H_
|
|
#define CAFFE2_OPERATORS_INDEX_HASH_OPS_H_
|
|
|
|
#include "caffe2/core/export_caffe2_op_to_c10.h"
|
|
#include "caffe2/core/logging.h"
|
|
#include "caffe2/core/operator.h"
|
|
|
|
C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(IndexHash);
|
|
|
|
namespace caffe2 {
|
|
|
|
template <class Context>
|
|
class IndexHashOp : public Operator<Context> {
|
|
public:
|
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
|
template <class... Args>
|
|
explicit IndexHashOp(Args&&... args)
|
|
: Operator<Context>(std::forward<Args>(args)...),
|
|
seed_(this->template GetSingleArgument<int64_t>("seed", 0)),
|
|
modulo_(this->template GetSingleArgument<int64_t>("modulo", 0)) {
|
|
CAFFE_ENFORCE_GT(modulo_, 0, "MODULO should be > 0");
|
|
}
|
|
|
|
bool RunOnDevice() override {
|
|
return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(
|
|
this, Input(INDICES));
|
|
}
|
|
|
|
template <typename T>
|
|
bool DoRunWithType() {
|
|
auto& indices = Input(INDICES);
|
|
|
|
auto* hashed_indices =
|
|
Output(HASHED_INDICES, indices.sizes(), at::dtype<T>());
|
|
|
|
CAFFE_ENFORCE_GE(
|
|
static_cast<int64_t>(std::numeric_limits<T>::max()),
|
|
modulo_,
|
|
"MODULO shouldn't be larger than the numeric limit of the indices");
|
|
|
|
auto N = indices.numel();
|
|
auto* indices_data = indices.template data<T>();
|
|
auto* hashed_indices_data = hashed_indices->template mutable_data<T>();
|
|
|
|
for (auto i = 0; i < N; i++) {
|
|
hashed_indices_data[i] = hash(indices_data[i]);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
protected:
|
|
template <typename T>
|
|
__ubsan_ignore_signed_int_overflow__
|
|
T hash(T id) {
|
|
int8_t* bytes = (int8_t*)&id;
|
|
T hashed = seed_ * 0xDEADBEEF;
|
|
for (int i = 0; i < sizeof(T) / sizeof(int8_t); i++) {
|
|
hashed = hashed * 65537 + bytes[i];
|
|
}
|
|
// We want the result of the modulo to be positive. This works under the
|
|
// assumption that modulo_ > 0 which is enforced in the constructor.
|
|
auto modHashed = hashed % modulo_;
|
|
return modHashed >= 0 ? modHashed : modHashed + modulo_;
|
|
}
|
|
|
|
private:
|
|
INPUT_TAGS(INDICES);
|
|
OUTPUT_TAGS(HASHED_INDICES);
|
|
|
|
int64_t seed_;
|
|
int64_t modulo_;
|
|
};
|
|
|
|
} // namespace caffe2
|
|
|
|
#endif // CAFFE2_OPERATORS_INDEX_HASH_OPS_H_
|