#include "caffe2/core/tensor.h" #include "caffe2/core/tensor_int8.h" #include "caffe2/core/blob_stats.h" #if defined(EXPOSE_C2_OPS) || \ !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE) #include #include "ATen/core/Tensor.h" #endif namespace caffe2 { CAFFE_DEFINE_KNOWN_TYPE(Tensor, Caffe2Tensor); TensorPrinter::TensorPrinter( // NOLINTNEXTLINE(modernize-pass-by-value) const std::string& tensor_name, const std::string& file_name, int limit) : to_file_(!file_name.empty()), limit_(limit ? limit : k_limit_default_), tensor_name_(tensor_name) { if (to_file_) { // We will output to file instead of printing on screen. // We will write each individual tensor to its individual file. // NOLINTNEXTLINE(modernize-make-unique) log_file_.reset(new std::ofstream( file_name, std::ofstream::out | std::ofstream::trunc)); CAFFE_ENFORCE( log_file_->good(), "Failed to open TensorPrinter file ", file_name, ". rdstate() = ", log_file_->rdstate()); } } // NOLINTNEXTLINE(bugprone-exception-escape) TensorPrinter::~TensorPrinter() { if (log_file_.get()) { log_file_->close(); } } void TensorPrinter::PrintMeta(const Tensor& tensor) { if (to_file_) { (*log_file_) << MetaStr(tensor) << std::endl; } else { LOG(INFO) << MetaStr(tensor); } } std::string TensorPrinter::MetaStr(const Tensor& tensor) { std::stringstream meta_stream; meta_stream << "Tensor " << tensor_name_ << " of type " << tensor.dtype().name() << ". Dims: ("; for (const auto dim : tensor.sizes()) { meta_stream << dim << ","; } meta_stream << "): "; return meta_stream.str(); } TypeMeta GetTensorType(const void* c) { const Tensor* tc = static_cast(c); return tc->dtype(); } TypeMeta GetInt8TensorType(const void* c) { const int8::Int8TensorCPU* int8_tensor = static_cast(c); return (int8_tensor->t).dtype(); } // TODO(jerryzh): Remove static CaffeMap type_call_registry_{ {TypeMeta::Id(), GetTensorType}, {TypeMeta::Id(), GetInt8TensorType}, }; TypeCall GetTypeCallFunction(TypeIdentifier id) { auto f = type_call_registry_.find(id); if (f == type_call_registry_.end()) { return nullptr; } return f->second; } void RegisterTypeCallFunction(TypeIdentifier id, TypeCall c) { type_call_registry_[id] = c; } int GetGPUIDForPointer(const void* ptr); vector GetTensorInfo(const void* c, size_t* capacity, DeviceOption* device) { CHECK(capacity); const Tensor* tc = static_cast(c); CHECK(tc); // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage) CHECK(tc->unsafeGetTensorImpl()); CHECK(tc->unsafeGetTensorImpl()->storage().unsafeGetStorageImpl()); *capacity = tc->storage().nbytes(); ExtractDeviceOption(device, tc->GetDevice()); return tc->sizes().vec(); } vector GetInt8TensorInfo(const void* c, size_t* capacity, DeviceOption* device) { const int8::Int8TensorCPU* int8_tensor = static_cast(c); return GetTensorInfo(&(int8_tensor->t), capacity, device); } // since we only have one tensor, probably need to remove this at some point? static CaffeMap tensor_info_call_registry_{ {TypeMeta::Id(), GetTensorInfo}, {TypeMeta::Id(), GetInt8TensorInfo}, }; // TODO: Remove this code in a separate diff, since we only have one // GetTensorInfo function now TensorInfoCall GetTensorInfoFunction(TypeIdentifier id) { auto f = tensor_info_call_registry_.find(id); if (f == tensor_info_call_registry_.end()) { return nullptr; } return f->second; } void RegisterTensorInfoFunction(TypeIdentifier id, TensorInfoCall c) { tensor_info_call_registry_[id] = c; } void TensorVectorResize( std::vector& tensors, int size, DeviceType type) { tensors.reserve(size); for (auto i = 0; i < size; ++i) { tensors.emplace_back(type); } } Tensor empty(at::IntArrayRef dims, at::TensorOptions options) { // TODO: merge this with at::empty after Tensor is merged auto tensor = Tensor(dims, options.device()); tensor.raw_mutable_data(options.dtype()); return tensor; } void ReinitializeTensor( Tensor* tensor, at::IntArrayRef dims, at::TensorOptions options) { CAFFE_ENFORCE(options.device_opt() != c10::nullopt); if (*tensor) { // Note: we don't compare device_id here because of the purpose of // ReinitializeTensor: https://github.com/pytorch/pytorch/pull/13147 // In the original code, we don't have device_id defined, therefore, we // should not include device_id in the comparison if (tensor->GetDeviceType() == options.device().type()) { if (tensor->sizes() != dims) { // Resize when the dims doesn't match tensor->Resize(dims); } if (tensor->dtype() == options.dtype()) { tensor->raw_mutable_data(); } else { // This C10 logging API is not thread-safe, and should not be called here // This can lead to a memory corruption in glog. // C10_LOG_FIRST_N(WARNING, 1) // << "Changing the data type of Tensor is discouraged." // << " Attempt to change data type from: " << tensor->dtype() // << " to: " << options.dtype(); // create a new Tensor when the data_type doesn't match *tensor = caffe2::empty(dims, options); } return; } // create a new Tensor when device doesn't match } VLOG(1) << "Create new mutable object " << TypeMeta::TypeName() << " dims: " << dims; *tensor = caffe2::empty(dims, options); } void ReinitializeAndCopyFrom( Tensor* t, at::TensorOptions options, const Tensor& src, bool async) { auto device_type = options.device().type(); CAFFE_ENFORCE(t != nullptr, "Target tensor ptr is null."); if (!*t || device_type != t->GetDeviceType()) { *t = Tensor(device_type); } CAFFE_ENFORCE( !t->dtype_initialized() || t->dtype() == src.dtype(), "We don't allow a change of data type in ReinitializeAndCopyFrom. Attempt to " " change from: ", t->dtype(), " to: ", src.dtype()); t->CopyFrom(src, async); } void Tensor::enforce_invariants() { if (impl_.get() == nullptr) { throw std::runtime_error("TensorImpl with nullptr is not supported"); } // TODO: only check `!impl_->requires_grad()` after Variable and Tensor are // merged #if !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE) CAFFE_ENFORCE( !(impl_->requires_grad() && at::GradMode::is_enabled()), "Caffe2 tensor wrapper doesn't support autograd variables that require grad"); #endif CAFFE_ENFORCE_EQ( impl_->layout(), at::kStrided, "Caffe2 tensor wrapper supports only regular non-sparse tensors"); CAFFE_ENFORCE( impl_->is_contiguous(), "Caffe2 tensor wrapper supports only contiguous tensors"); } void Tensor::CopyFrom(const Tensor& src, bool async) { // TODO: only check `!impl_->requires_grad()` after Variable and Tensor are // merged #if defined(EXPOSE_C2_OPS) || \ !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE) AT_ASSERT(!(impl_->requires_grad() && at::GradMode::is_enabled())); #endif AT_ASSERTM( src.impl_->is_contiguous(), "Right now only copy of contiguous source Tensor is supported."); AT_ASSERTM( src.impl_->storage_initialized(), "Cannot copy from an uninitialized Tensor"); if (src.impl_.get() == impl_.get()) { return; } // Test if we need to allocate a new storage // Uninitialized storages are guaranteed to be uniquely owned, // so we don't need to swap in dst case. // If the dtype changed, we need to reallocate storage. if (impl_->dtype() != src.impl_->dtype()) { // NB: copy preserves device_type // This storage will get initialized by the mutable_data call below. impl_->set_storage_and_dtype( at::Storage::create_legacy(impl_->device_type()), src.impl_->dtype()); } impl_->Resize(src.impl_->sizes()); if (impl_->numel() > 0) { if (impl_->dtype().copy()) { AT_ASSERTM( impl_->device_type() == ::at::DeviceType::CPU, "In CopyFrom source and dest tensors must both be CPU for " "non-POD copy, but dest tensor was ", impl_->device_type()); AT_ASSERTM( src.impl_->device_type() == ::at::DeviceType::CPU, "In CopyFrom source and dest tensors must both be CPU for " "non-POD copy, but src tensor was ", src.impl_->device_type()); impl_->dtype().copy()( src.impl_->data(), impl_->raw_mutable_data(impl_->dtype()), impl_->numel()); } else { // The following copy uses the current (thread local) stream for copying // and also takes the GPU id from the device() field passed in. // // TODO: Potentially more enforcements are necessary to avoid accidental // switch to sync copy if the currently set device is wrong. // // Specifically, we might need to switch to a different context device // here explicitly to avoid relying on user synchronizing things // properly. // // note: raw_mutable_data initializes device here void* new_data = impl_->raw_mutable_data(impl_->dtype()); at::CopyBytes( impl_->numel() * impl_->itemsize(), src.impl_->data(), src.impl_->device(), new_data, impl_->device(), async); } } } #if defined(EXPOSE_C2_OPS) || \ !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE) Tensor::Tensor(at::Tensor tensor) : impl_(tensor.unsafeReleaseIntrusivePtr()) { enforce_invariants(); } Tensor::operator at::Tensor() const& { return at::Tensor::wrap_tensor_impl(impl_); } Tensor::operator at::Tensor() && { return at::Tensor::wrap_tensor_impl(std::move(impl_)); } #endif namespace { struct TensorStatGetter : BlobStatGetter { size_t sizeBytes(const Blob& blob) const override { const auto& tensor = blob.Get(); auto nbytes = tensor.nbytes(); if (nbytes > 0 && tensor.IsType()) { const auto* data = tensor.data(); for (int i = 0; i < tensor.numel(); ++i) { nbytes += data[i].size(); } } return nbytes; } }; REGISTER_BLOB_STAT_GETTER(Tensor, TensorStatGetter); } // namespace } // namespace caffe2