#include #include #include #include #include #include #include #include #include C10_DEFINE_bool( caffe2_keep_on_shrink, true, "If set, keeps memory when a tensor is shrinking its size."); C10_DEFINE_int64( caffe2_max_keep_on_shrink_memory, LLONG_MAX, "The maximum memory in bytes to keep on shrink, if the difference between " "tensor sizes is bigger than this then tensor will be reset."); namespace c10 { const char* const TensorImpl::err_msg_tensor_metadata_change_not_allowed = "is not allowed on a Tensor created from .data or .detach().\n" "If your intent is to change the metadata of a Tensor (such as sizes / strides / storage / storage_offset)\n" "without autograd tracking the change, remove the .data / .detach() call and wrap the change in a `with torch.no_grad():` block.\n" "For example, change:\n" " x.data.set_(y)\n" "to:\n" " with torch.no_grad():\n" " x.set_(y)"; at::Tensor& TensorImpl::mutable_grad() { if (!autograd_meta_) autograd_meta_ = impl::GetAutogradMetaFactory()->make(); return autograd_meta_->mutable_grad(); } const at::Tensor& TensorImpl::grad() const { // Yes, I know this looks really weird. But I don't really have a choice as // long as this function returns a const reference to Tensor. I'm not // really sure how I would have designed this API differently, but it // is not so easy to fix right now because the mutable counterpart of // this function must keep working so that "x.grad() = ..." keeps working // (part of public API). if (!autograd_meta_) return impl::GetAutogradMetaFactory()->undefined_tensor(); return autograd_meta_->grad(); } const at::Tensor& TensorImpl::_fw_grad( uint64_t level, const at::TensorBase& self) const { // See TensorImpl::grad() above for explanation about the line below if (!autograd_meta_) return impl::GetAutogradMetaFactory()->undefined_tensor(); return autograd_meta_->fw_grad(level, self); } void TensorImpl::_set_fw_grad( const at::TensorBase& new_grad, const at::TensorBase& self, uint64_t level, bool is_inplace_op) { if (!autograd_meta_) autograd_meta_ = impl::GetAutogradMetaFactory()->make(); autograd_meta_->set_fw_grad(new_grad, self, level, is_inplace_op); } TensorImpl::~TensorImpl() { destroy_pyobj_if_needed(); } TensorImpl::TensorImpl( Storage&& storage, DispatchKeySet key_set, const caffe2::TypeMeta data_type) // Use std::forward to suppress static analyzer false positive. : TensorImpl( std::forward(storage), key_set, data_type, storage.device()) {} // [Note: Python key removal] // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // In most constructors for TensorImpl, you will see Python and // PythonTLSSnapshot keys are removed from the passed in DispatchKeySet. Why? // // INVARIANT: Python and PythonTLSSnapshot dispatch keys are set iff PyObject // for the Tensor has a nontrivial __torch_dispatch__ implementation. // // When a fresh TensorImpl is created, there is *no* PyObject (this only gets // initialized lazily at the first point in time the Tensor passes into Python). // So we would violate the invariant. // // In practice, what will happen shortly afterwards is that the TensorImpl // will get its PyObject initialized by Tensor._make_subclass; at this point // the Python and PythonTLSSnapshot dispatch keys will be set and all is well. // The point is to delay the dispatch key setting until that point. // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) TensorImpl::TensorImpl( ImplType type, Storage&& storage, DispatchKeySet key_set, const caffe2::TypeMeta data_type) : storage_(std::move(storage)), pyobj_interpreter_(nullptr), pyobj_(nullptr), storage_offset_(0), numel_(0), data_type_(data_type), device_opt_(storage_.device()), key_set_(key_set - c10::python_ks) { // See [Note: Python key removal] init_bitfields(); // Inference tensor doesn't have version counter. if (!is_inference()) { version_counter_ = VariableVersion(/*version=*/0); } } TensorImpl::TensorImpl( DispatchKeySet key_set, const caffe2::TypeMeta data_type, c10::optional device_opt) // NOLINTNEXTLINE(performance-move-const-arg) : TensorImpl({}, key_set, data_type, std::move(device_opt)) {} // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) TensorImpl::TensorImpl( Storage&& storage, DispatchKeySet key_set, const caffe2::TypeMeta data_type, c10::optional device_opt) : storage_(std::move(storage)), pyobj_interpreter_(nullptr), pyobj_(nullptr), storage_offset_(0), numel_(0), data_type_(data_type), device_opt_(device_opt) { init_bitfields(); if (!key_set.empty()) { TORCH_INTERNAL_ASSERT( data_type == ScalarType::Undefined || device_opt_.has_value()); // UndefinedTensorImpl is a singleton, so we skip logging it C10_LOG_API_USAGE_ONCE("tensor.create"); } bool inference_mode = c10::InferenceMode::is_enabled(); // TODO: be more explicit about the full key set at call sites so we // don't have to keep recomputing it here auto k = key_set.highestBackendKey(); key_set = key_set | getAutocastRelatedKeySetFromBackend(k); // See [Note: Python key removal] key_set = key_set - c10::python_ks; // Inference tensor doesn't have autograd related keys. if (inference_mode) { // See Note [Expected TLS state in InferenceMode] for why we exclude // Autograd & ADInplaceOrView keys. Normally key_set only contains backend // keys but we do the substraction here to make sure. key_set_ = key_set - c10::autograd_dispatch_keyset_with_ADInplaceOrView; } else { // TODO: Ideally we only add AutogradBackend key when the tensor requires // grad. // See Note [Dream: skip VariableType kernel when requires_grad=false] key_set_ = key_set | getAutogradRelatedKeySetFromBackend(k); } // Inference tensor doesn't have version counter. if (!is_inference()) { version_counter_ = VariableVersion(/*version=*/0); } // we would also like to check that non-cpu devices have an index, but some // Caffe2 operators create Storages with default devices. } void TensorImpl::HandleResize() { // If needed, we will free the data. the next mutable_data() call // will create the data storage. bool reset_tensor = false; if (reserved_) { // If tensor is reserved then don't claim its memeory unless nbytes() // is smaller than new size reset_tensor = storage_.nbytes() < (storage_offset_ + numel_) * data_type_.itemsize(); } else { reset_tensor = storage_.nbytes() < (storage_offset_ + numel_) * data_type_.itemsize() || !FLAGS_caffe2_keep_on_shrink || storage_.nbytes() - (storage_offset_ + numel_) * data_type_.itemsize() > static_cast(FLAGS_caffe2_max_keep_on_shrink_memory); } if (reset_tensor && storage_initialized()) { FreeMemory(); } } bool TensorImpl::compute_contiguous() const { bool is_contiguous = true; if (is_empty()) return is_contiguous; int64_t z = 1; for (int64_t d = dim() - 1; d >= 0; d--) { const auto size_d = sizes_and_strides_.size_at_unchecked(d).as_int_unchecked(); if (size_d != 1) { if (sizes_and_strides_.stride_at_unchecked(d).as_int_unchecked() == z) { z *= size_d; } else { is_contiguous = false; break; } } } return is_contiguous; } bool TensorImpl::compute_channels_last_contiguous_2d() const { // Please don't combine these code, constant array is used here to let // compiler fully unroll the loop to get better performance switch (sizes_and_strides_.size()) { case 4: { int64_t expected = 1; for (auto& d : {1, 3, 2, 0}) { const auto size_d = sizes_and_strides_.size_at_unchecked(d).as_int_unchecked(); if (size_d != 1) { if (sizes_and_strides_.stride_at_unchecked(d).as_int_unchecked() != expected) { return false; } expected *= size_d; } } return true; } // NOLINTNEXTLINE(bugprone-branch-clone) case 3: // TODO dim == 3 case will be enabled once it is fully tested return false; default: return false; } } bool TensorImpl::compute_channels_last_contiguous_3d() const { // Please don't combine these code, constant array is used here to let // compiler fully unroll the loop to get better performance switch (sizes_and_strides_.size()) { case 5: { int64_t expected = 1; for (auto& d : {1, 4, 3, 2, 0}) { const auto size_d = sizes_and_strides_.size_at_unchecked(d).as_int_unchecked(); if (size_d != 1) { if (sizes_and_strides_.stride_at_unchecked(d).as_int_unchecked() != expected) { return false; } expected *= size_d; } } return true; } // NOLINTNEXTLINE(bugprone-branch-clone) case 4: // TODO dim == 4 case will be enabled once it is fully tested return false; default: return false; } } bool TensorImpl::compute_strides_like_channels_last_2d() const { return is_channels_last_strides_2d( TensorImpl::sizes(), TensorImpl::strides()); } bool TensorImpl::compute_strides_like_channels_last_3d() const { return is_channels_last_strides_3d( TensorImpl::sizes(), TensorImpl::strides()); } bool TensorImpl::compute_non_overlapping_and_dense() const { if (dim() == 1) { return sizes_and_strides_.size_at_unchecked(0) < 2 || sizes_and_strides_.stride_at_unchecked(0) == 1; } SmallVector perm; perm.resize(dim()); for (const auto i : c10::irange(dim())) { perm[i] = i; } // Sort by strides, leaving 0 and 1 sized dims at the end of the array std::sort(perm.begin(), perm.end(), [&](int64_t a, int64_t b) { if (sizes_and_strides_.size_at_unchecked(a) < 2) { return false; } else if (sizes_and_strides_.size_at_unchecked(b) < 2) { return true; } return sizes_and_strides_.stride_at_unchecked(a) < sizes_and_strides_.stride_at_unchecked(b); }); SymInt require_stride = 1; for (const auto i : c10::irange(dim())) { const auto size_perm_i = sizes_and_strides_.size_at_unchecked(perm[i]); if (size_perm_i < 2) { return true; } if (sizes_and_strides_.stride_at_unchecked(perm[i]) != require_stride) { return false; } require_stride *= size_perm_i; } return true; } void TensorImpl::release_resources() { autograd_meta_.reset(); if (storage_) { storage_ = {}; } destroy_pyobj_if_needed(); } void TensorImpl::destroy_pyobj_if_needed() { if (owns_pyobj()) { TORCH_INTERNAL_ASSERT(pyobj_interpreter_ != nullptr); TORCH_INTERNAL_ASSERT(pyobj_ != nullptr); pyobj_interpreter_.load(std::memory_order_acquire) ->decref(_unchecked_untagged_pyobj(), /*is_tensor*/ true); // NB: this destructor can only be entered when there are no // references to this C++ object (obviously), NOR any references // to the PyObject (if there are references to the PyObject, // then the PyObject holds an owning reference to the tensor). // So it is OK to clear pyobj_ here as it is impossible for it to // be used again (modulo weak reference races) pyobj_ = nullptr; // for safety } } #ifndef C10_DISABLE_TENSORIMPL_EXTENSIBILITY bool TensorImpl::has_storage() const { return storage_; } #endif void TensorImpl::throw_storage_access_error() const { TORCH_CHECK_NOT_IMPLEMENTED( false, "Cannot access storage of ", tensorimpl_type_name()); } impl::PyInterpreter* TensorImpl::load_pyobj_interpreter() const { auto interpreter = pyobj_interpreter_.load(std::memory_order_acquire); if (interpreter) { return interpreter; } TORCH_CHECK( false, "cannot access PyObject for Tensor on interpreter ", pyobj_interpreter_.load()->name()); } bool TensorImpl::is_contiguous_custom(at::MemoryFormat memory_format) const { if (is_python_dispatch()) { return load_pyobj_interpreter()->is_contiguous(this); } TORCH_CHECK( false, "Tensors of type ", tensorimpl_type_name(), " do not have is_contiguous"); } IntArrayRef TensorImpl::sizes_custom() const { if (is_python_dispatch()) { return load_pyobj_interpreter()->sizes(this); } TORCH_CHECK( false, "Tensors of type ", tensorimpl_type_name(), " do not have sizes"); } c10::SymIntArrayRef TensorImpl::sym_sizes_custom() const { TORCH_CHECK( false, "Tensors of type ", tensorimpl_type_name(), " do not have sym sizes"); } c10::Device TensorImpl::device_custom() const { if (is_python_dispatch()) { return load_pyobj_interpreter()->device(this); } TORCH_CHECK( false, "Tensors of type ", tensorimpl_type_name(), " do not have device"); } IntArrayRef TensorImpl::strides_custom() const { if (is_python_dispatch()) { return load_pyobj_interpreter()->strides(this); } TORCH_CHECK( false, "Tensors of type ", tensorimpl_type_name(), " do not have strides"); } int64_t TensorImpl::dim_custom() const { if (is_python_dispatch()) { return load_pyobj_interpreter()->dim(this); } TORCH_CHECK( false, "Tensors of type ", tensorimpl_type_name(), " do not have dim"); } int64_t TensorImpl::numel_custom() const { TORCH_CHECK( false, "Tensors of type ", tensorimpl_type_name(), " do not have numel"); } static void deletePlacementDeleteContext(void* ptr) { delete static_cast(ptr); } at::DataPtr PlacementDeleteContext::makeDataPtr( at::DataPtr&& data_ptr, PlacementDtor placement_dtor, size_t size, at::Device device) { auto* ptr = data_ptr.get(); return { ptr, new PlacementDeleteContext(std::move(data_ptr), placement_dtor, size), &deletePlacementDeleteContext, device}; } AutogradMetaInterface::~AutogradMetaInterface() = default; // Setting requires_grad to true on inference tensor outside InferenceMode // is forbidden. Ideally it would also be illegal inside InferenceMode. // But there's no way that we can directly allocate a tensor to have // requires_grad = true in C++ constructor so set_requires_grad is widely // used in C++ frontend. Forbidding it inside InferenceMode will force users // to delete these setter code in their code which is not ideal. void TensorImpl::set_requires_grad(bool requires_grad) { TORCH_CHECK( !(requires_grad && is_inference() && !c10::InferenceMode::is_enabled()), "Setting requires_grad=True on inference tensor outside InferenceMode is not allowed."); if (!requires_grad && !autograd_meta_) return; if (!autograd_meta_) autograd_meta_ = impl::GetAutogradMetaFactory()->make(); // NB: In principle, setting requires_grad to false could result in // the AutogradMeta becoming equal to a default constructed state, // in which case we could apply the nullptr AutogradMeta optimization // (see autograd_meta_ docs). But we don't do this right now. Note // that it is unsound to unconditionally set AutogradMeta to false // when you set requires_grad to False, as there may be nontrivial // information content in the other fields; for example, we may // have set the string name for a Variable, or there may be hooks // registered for it. autograd_meta_->set_requires_grad(requires_grad, this); } bool TensorImpl::requires_grad() const { if (!autograd_meta_) return false; return autograd_meta_->requires_grad(); } void TensorImpl::set_autograd_meta( std::unique_ptr autograd_meta) { // NB: autograd_meta may be null! That just means it's the default // constructor autograd_meta_ = std::move(autograd_meta); } c10::AutogradMetaInterface* TensorImpl::autograd_meta() const { // NB: Might return null! return autograd_meta_.get(); } template c10::intrusive_ptr TensorImpl::shallow_copy_and_detach_core( VariableVersion&& version_counter, bool allow_tensor_metadata_change) const { if (key_set_.has(DispatchKey::Python) && !c10::impl::tls_is_dispatch_key_excluded(DispatchKey::Python)) { auto r = pyobj_interpreter_.load(std::memory_order_acquire)->detach(this); if (r) { r->set_version_counter(std::forward(version_counter)); r->set_allow_tensor_metadata_change(allow_tensor_metadata_change); return r; } // otherwise just copy the TensorImpl and not the PyObject. Since // the interpreter is dead no one can call us out on it } auto impl = c10::make_intrusive( // No need to populate Storage; copy_tensor_metadata will do it for us. key_set_, data_type_, device_opt_); copy_tensor_metadata( /*src_impl=*/this, /*dest_impl=*/impl.get(), /*version_counter=*/std::forward(version_counter), /*allow_tensor_metadata_change=*/allow_tensor_metadata_change); impl->refresh_numel(); impl->refresh_contiguous(); return impl; } c10::intrusive_ptr TensorImpl::shallow_copy_and_detach( const c10::VariableVersion& version_counter, bool allow_tensor_metadata_change) const { return shallow_copy_and_detach_core( version_counter, allow_tensor_metadata_change); } c10::intrusive_ptr TensorImpl::shallow_copy_and_detach( c10::VariableVersion&& version_counter, bool allow_tensor_metadata_change) const { return shallow_copy_and_detach_core( std::move(version_counter), allow_tensor_metadata_change); } // This function copies all of the metadata from the src tensor except for: // - key_set_ // - storage_ // - storage_access_should_throw_ // - sizes_strides_policy_ // - version_counter_ // - allow_tensor_metadata_change_ // The idea is that if we have a "wrapper tensor" (like in functionalization), // all of the above are properties that the wrapper will want to customize, // while everything else should be mirrored between the wrapper and the inner // tensor. void TensorImpl::copy_generic_tensor_metadata( const TensorImpl* src_impl, TensorImpl* dest_impl) { dest_impl->sizes_and_strides_ = src_impl->sizes_and_strides_; dest_impl->storage_offset_ = src_impl->storage_offset_; dest_impl->data_type_ = src_impl->data_type_; dest_impl->device_opt_ = src_impl->device_opt_; dest_impl->is_contiguous_ = src_impl->is_contiguous_; dest_impl->is_channels_last_contiguous_ = src_impl->is_channels_last_contiguous_; dest_impl->is_channels_last_3d_contiguous_ = src_impl->is_channels_last_3d_contiguous_; dest_impl->is_channels_last_ = src_impl->is_channels_last_; dest_impl->is_channels_last_3d_ = src_impl->is_channels_last_3d_; dest_impl->is_non_overlapping_and_dense_ = src_impl->is_non_overlapping_and_dense_; dest_impl->is_wrapped_number_ = src_impl->is_wrapped_number_; dest_impl->reserved_ = src_impl->reserved_; if (src_impl->named_tensor_meta_ != nullptr) { dest_impl->named_tensor_meta_ = src_impl->named_tensor_meta_->clone(); } dest_impl->sizes_strides_policy_ = src_impl->sizes_strides_policy_; } void TensorImpl::copy_tensor_metadata_except_version_counter( const TensorImpl* src_impl, TensorImpl* dest_impl, bool allow_tensor_metadata_change) { // First call the generic copy function copy_generic_tensor_metadata(src_impl, dest_impl); // Then copy everything else (see the comment at copy_generic_tensor_metadata // for the list of metadata that it does not directly copy). dest_impl->storage_ = src_impl->storage_; // Copying tensor metadata doesn't change the PyObject (maybe // it should), which means that we have to preserve whatever the // original Python keyset was (as it's associated with the PyObject // being a tensor subclass or not) dest_impl->key_set_ = (src_impl->key_set_ - c10::python_ks) | (dest_impl->key_set_ & c10::python_ks); dest_impl->set_allow_tensor_metadata_change(allow_tensor_metadata_change); dest_impl->storage_access_should_throw_ = src_impl->storage_access_should_throw_; } void TensorImpl::copy_tensor_metadata( const TensorImpl* src_impl, TensorImpl* dest_impl, const c10::VariableVersion& version_counter, bool allow_tensor_metadata_change) { copy_tensor_metadata_except_version_counter( src_impl, dest_impl, allow_tensor_metadata_change); // TODO: In the ideal end state, it's okay to set disabled version_counter // on inference tensor since it's a no-op. This requires refactor on call // sites. if (!dest_impl->is_inference()) { dest_impl->set_version_counter(version_counter); } } void TensorImpl::copy_tensor_metadata( const TensorImpl* src_impl, TensorImpl* dest_impl, c10::VariableVersion&& version_counter, bool allow_tensor_metadata_change) { copy_tensor_metadata_except_version_counter( src_impl, dest_impl, allow_tensor_metadata_change); if (!dest_impl->is_inference()) { dest_impl->set_version_counter(std::move(version_counter)); } } // Legacy Caffe2 operations void TensorImpl::Extend(int64_t num, float growthPct) { TORCH_CHECK(sizes_and_strides_.size() >= 1u); TORCH_CHECK(num >= 0, "`num` must be non-negative for Extend"); TORCH_CHECK( is_contiguous_, "Right now Extend is only supported for contiguous Tensor."); TORCH_CHECK( !has_symbolic_sizes_strides_, "Extend() called on tensor with symbolic shape") using SizesVector = SmallVector; IntArrayRef sizes_and_strides = asIntArrayRefUnchecked(sizes_and_strides_.sizes_arrayref()); SizesVector newDims(sizes_and_strides.begin(), sizes_and_strides.end()); newDims[0] += num; if (!storage_.data()) { Resize(newDims); return; } const auto newNumel = c10::multiply_integers(newDims.begin(), newDims.end()); if (newNumel * data_type_.itemsize() <= storage_.nbytes()) { sizes_and_strides_.set_sizes(SymIntArrayRef::fromIntArrayRef(newDims)); numel_ = newNumel; return; } SizesVector newCapacity(sizes_and_strides.begin(), sizes_and_strides.end()); newCapacity[0] = std::max( newDims[0], static_cast(std::ceil( sizes_and_strides_.size_at_unchecked(0).as_int_unchecked() * (1 + growthPct / 100)))); auto oldData = std::move(storage_.data_ptr()); auto oldSize = numel_; Resize(newCapacity); auto* newData = raw_mutable_data(data_type_); if (data_type_.copy()) { TORCH_CHECK( device_type() == DeviceType::CPU, "non-POD types work only on CPU"); data_type_.copy()(oldData.get(), newData, oldSize); } else { // The following copy uses the current (thread local) stream for copying // and also takes the GPU id from the device() field passed in. // // TODO: Potentially more enforcements are necessary to avoid accidental // switch to sync copy if the currently set device is wrong. // // Specifically, we might need to switch to a different context device // here explicitly to avoid relying on user synchronizing things // properly. CopyBytes( oldSize * itemsize(), oldData.get(), device(), newData, device(), true); // non-blocking } reserved_ = true; sizes_and_strides_.set_sizes(SymIntArrayRef::fromIntArrayRef(newDims)); numel_ = newNumel; } void TensorImpl::ReserveSpace(int64_t outer_dim) { TORCH_CHECK( is_contiguous_, "Right now ReserveSpace is only supported for contiguous Tensor."); TORCH_CHECK( !has_symbolic_sizes_strides_, "ReserveSpace() called on tensor with symbolic shape") TORCH_CHECK(storage_.unique(), "Can't call ReserveSpace on shared storage."); // TODO: eliminate newCapacity. IntArrayRef sizes_and_strides = asIntArrayRefUnchecked(sizes_and_strides_.sizes_arrayref()); SmallVector newCapacity( sizes_and_strides.begin(), sizes_and_strides.end()); newCapacity[0] = outer_dim; auto newNumel = c10::multiply_integers(newCapacity); if (newNumel * data_type_.itemsize() <= storage_.nbytes()) { return; } // Old data is discarded storage_.data_ptr().clear(); auto oldSize = numel_; SmallVector oldDims( sizes_and_strides.begin(), sizes_and_strides.end()); Resize(newCapacity); // Allocate new memory but don't copy over the data raw_mutable_data(data_type_); sizes_and_strides_.set_sizes(SymIntArrayRef::fromIntArrayRef(oldDims)); numel_ = oldSize; reserved_ = true; } void TensorImpl::Reshape(const std::vector& dims) { TORCH_CHECK( is_contiguous_, "Right now Reshape is only supported for contiguous Tensor."); TORCH_CHECK( !has_symbolic_sizes_strides_, "Reshape() called on tensor with symbolic shape") int64_t new_size = 1; for (auto d : dims) { TORCH_CHECK(d >= 0); new_size *= d; } TORCH_CHECK( new_size == numel_, "New size and old size are not equal. You cannot use Reshape, " "but should use Resize." // TODO(jiayq): remove the following warning after pending diffs // stabilize. " The old caffe2 mixes Reshape and Resize but this behavior has " "been changed. If you find this error, most likely you will need " "to change corresponding code from Reshape to Resize."); sizes_and_strides_.set_sizes(SymIntArrayRef::fromIntArrayRef(dims)); empty_tensor_restride(MemoryFormat::Contiguous); } void TensorImpl::FreeMemory() { // We'll detach from the old Storage and create a new one if (storage_.use_count() != 1 || !storage_.resizable() || !storage_.allocator()) { storage_ = Storage::create_legacy(storage_.device()); } else { storage_.reset_legacy(); } storage_offset_ = 0; } void TensorImpl::ShareData(const TensorImpl& src) { // Right now, we are assuming the device_type are the same, since it is // inherently the same in the non-templatized code. We should probably add // an assert here which might affect perf a little bit. TORCH_CHECK( src.numel_ == numel_, "Size mismatch - did you call reshape before sharing the data?"); // It is possible that the source tensor hasn't called mutable_data() yet, // in which case ShareData() doesn't make much sense since we don't really // know what to share yet. // TODO: Add the assert after all uninitialized states are eliminated // TORCH_CHECK(src.dtype_initialized(), // "Source tensor don't have a data type (did you call // mutable_data on the tensor?)"); if (!src.dtype_initialized()) { C10_LOG_EVERY_MS(WARNING, 1000) << "Source tensor don't have a data type (did you call mutable_data on the tensor?)"; } TORCH_CHECK( src.storage_initialized(), "Source tensor has no content and has size > 0"); // Finally, do sharing. /* Since we create new Storage whenever we need to change data_type/nbytes * this still keeps the original semantics */ storage_ = src.storage(); data_type_ = src.dtype(); device_opt_ = src.device_opt(); storage_offset_ = src.storage_offset(); } void TensorImpl::ShareExternalPointer( DataPtr&& data_ptr, const caffe2::TypeMeta data_type, size_t size_bytes) { TORCH_CHECK( data_type != ScalarType::Undefined, "To share with a raw external pointer you need to pass in an " "initialized data_type(TypeMeta)."); if (!size_bytes) { size_bytes = numel_ * data_type.itemsize(); } if (storage_.unique()) { storage_.UniqueStorageShareExternalPointer(std::move(data_ptr), size_bytes); data_type_ = data_type; device_opt_ = storage_.device(); storage_offset_ = 0; } else { // Create a new Storage storage_ = Storage( Storage::use_byte_size_t(), size_bytes, std::move(data_ptr), /*allocator=*/nullptr, /*resizable=*/false); data_type_ = data_type; device_opt_ = storage_.device(); storage_offset_ = 0; } } void TensorImpl::set_sym_sizes_and_strides( c10::SymIntArrayRef sizes, c10::SymIntArrayRef strides) { has_symbolic_sizes_strides_ = true; sizes_strides_policy_ = static_cast(SizesStridesPolicy::CustomSizes); sizes_and_strides_.set_sizes(sizes); sizes_and_strides_.set_strides(strides); } namespace impl { namespace { AutogradMetaFactory* meta_factory = nullptr; } // namespace void SetAutogradMetaFactory(AutogradMetaFactory* factory) { meta_factory = factory; } AutogradMetaFactory* GetAutogradMetaFactory() { TORCH_CHECK( meta_factory, "Support for autograd has not been loaded; have you linked against libtorch.so?") return meta_factory; } } // namespace impl } // namespace c10