mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/40251 Rather than segfaulting, we should show a good error message when in op.call<Return, Args...>(...) the Return type or Args types mismatch the kernel. This adds an assertion comparing two std::type_index to the call path, but that should be fast. Hashing the function signature is also in the call path and not strictly constexpr, but I checked on godbolt that GCC >=5 and Clang >=3.8 optimize it away and make it constexpr, i.e. it's not part of the assembly. ghstack-source-id: 106194240 Test Plan: waitforsandcastle Differential Revision: D22126701 fbshipit-source-id: 6c908a822e295757bcc0014f78f51e6a560f221f
126 lines
4.1 KiB
C++
126 lines
4.1 KiB
C++
#include <torch/extension.h>
|
|
#include <torch/library.h>
|
|
|
|
using namespace at;
|
|
|
|
static int test_int;
|
|
|
|
Tensor get_tensor(caffe2::TypeMeta dtype, IntArrayRef size) {
|
|
auto tensor_impl = c10::make_intrusive<TensorImpl, UndefinedTensorImpl>(
|
|
Storage(
|
|
Storage::use_byte_size_t(),
|
|
0,
|
|
at::DataPtr(nullptr, Device(DeviceType::MSNPU, 0)),
|
|
nullptr,
|
|
false),
|
|
DispatchKey::MSNPU,
|
|
dtype);
|
|
// This is a hack to workaround the shape checks in _convolution.
|
|
tensor_impl->set_sizes_contiguous(size);
|
|
return Tensor(std::move(tensor_impl));
|
|
}
|
|
|
|
Tensor empty_override(IntArrayRef size, const TensorOptions& options, c10::optional<c10::MemoryFormat> optional_memory_format) {
|
|
test_int = 0;
|
|
return get_tensor(options.dtype(), size);
|
|
}
|
|
|
|
Tensor add_override(const Tensor & a, const Tensor & b , Scalar c) {
|
|
test_int = 1;
|
|
return get_tensor(a.dtype(), a.sizes());
|
|
}
|
|
|
|
Tensor fake_convolution(
|
|
const Tensor& input, const Tensor& weight, const Tensor& bias,
|
|
IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation,
|
|
bool transposed, IntArrayRef output_padding, int64_t groups) {
|
|
test_int = 2;
|
|
// Only the first 2 dimension of output shape is correct.
|
|
return get_tensor(input.dtype(), {input.size(0), weight.size(0), input.size(2), input.size(3)});
|
|
}
|
|
|
|
std::tuple<Tensor,Tensor,Tensor> fake_convolution_backward(
|
|
const Tensor & grad_output, const Tensor & input, const Tensor & weight,
|
|
IntArrayRef stride, IntArrayRef padding,
|
|
IntArrayRef dilation, bool transposed, IntArrayRef output_padding,
|
|
int64_t groups, std::array<bool,3> output_mask) {
|
|
test_int = 3;
|
|
return std::tuple<Tensor, Tensor, Tensor>(
|
|
get_tensor(input.dtype(), input.sizes()),
|
|
get_tensor(weight.dtype(), weight.sizes()),
|
|
get_tensor(input.dtype(), {}));
|
|
}
|
|
|
|
TORCH_LIBRARY_IMPL(aten, MSNPU, m) {
|
|
m.impl_UNBOXED("empty.memory_format", empty_override);
|
|
m.impl_UNBOXED("add.Tensor", add_override);
|
|
m.impl_UNBOXED("convolution_overrideable", fake_convolution);
|
|
m.impl_UNBOXED("convolution_backward_overrideable", fake_convolution_backward);
|
|
}
|
|
|
|
// TODO: Extend this to exercise multi-device setting. In that case,
|
|
// we need to add a thread local variable to track the current device.
|
|
struct MSNPUGuardImpl final : public c10::impl::DeviceGuardImplInterface {
|
|
static constexpr DeviceType static_type = DeviceType::MSNPU;
|
|
MSNPUGuardImpl() {}
|
|
MSNPUGuardImpl(DeviceType t) {
|
|
AT_ASSERT(t == DeviceType::MSNPU);
|
|
}
|
|
DeviceType type() const override {
|
|
return DeviceType::MSNPU;
|
|
}
|
|
Device exchangeDevice(Device d) const override {
|
|
AT_ASSERT(d.type() == DeviceType::MSNPU);
|
|
AT_ASSERT(d.index() == 0);
|
|
return d;
|
|
}
|
|
Device getDevice() const override {
|
|
return Device(DeviceType::MSNPU, 0);
|
|
}
|
|
void setDevice(Device d) const override {
|
|
AT_ASSERT(d.type() == DeviceType::MSNPU);
|
|
AT_ASSERT(d.index() == 0);
|
|
}
|
|
void uncheckedSetDevice(Device d) const noexcept override {
|
|
}
|
|
Stream getStream(Device d) const noexcept override {
|
|
return Stream(Stream::DEFAULT, Device(DeviceType::MSNPU, 0));
|
|
}
|
|
Stream exchangeStream(Stream s) const noexcept override {
|
|
return Stream(Stream::DEFAULT, Device(DeviceType::MSNPU, 0));
|
|
}
|
|
DeviceIndex deviceCount() const noexcept override {
|
|
return 1;
|
|
}
|
|
|
|
// Event-related functions
|
|
void record(void** event,
|
|
const Stream& stream,
|
|
const DeviceIndex device_index,
|
|
const EventFlag flag) const override {
|
|
TORCH_CHECK(false, "MSNPU backend doesn't support events.");
|
|
}
|
|
void block(
|
|
void* event,
|
|
const Stream& stream) const override {
|
|
TORCH_CHECK(false, "MSNPU backend doesn't support events.");
|
|
}
|
|
bool queryEvent(void* event) const override {
|
|
TORCH_CHECK(false, "MSNPU backend doesn't support events.");
|
|
}
|
|
void destroyEvent(
|
|
void* event,
|
|
const DeviceIndex device_index) const noexcept override { }
|
|
};
|
|
|
|
constexpr DeviceType MSNPUGuardImpl::static_type;
|
|
C10_REGISTER_GUARD_IMPL(MSNPU, MSNPUGuardImpl);
|
|
|
|
int get_test_int() {
|
|
return test_int;
|
|
}
|
|
|
|
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
|
m.def("get_test_int", &get_test_int);
|
|
}
|