diff --git a/binaries/benchmark_helper.cc b/binaries/benchmark_helper.cc index 001c8e965f6..f481a6292c7 100644 --- a/binaries/benchmark_helper.cc +++ b/binaries/benchmark_helper.cc @@ -163,7 +163,7 @@ void loadInput( CAFFE_THROW("Not support GPU on mobile."); #endif } else { - caffe2::TensorCPU* tensor = blob->GetMutableTensor(caffe2::CPU); + caffe2::TensorCPU* tensor = BlobGetMutableTensor(blob, caffe2::CPU); CHECK_NOTNULL(tensor); tensor->Resize(input_dims); if (input_type_list[i] == "uint8_t") { @@ -200,7 +200,7 @@ void fillInputBlob( int protos_size = tensor_kv.second.protos_size(); caffe2::TensorProto* tensor_proto = tensor_kv.second.mutable_protos(iteration % protos_size); - caffe2::TensorCPU* tensor = blob->GetMutableTensor(caffe2::CPU); + caffe2::TensorCPU* tensor = BlobGetMutableTensor(blob, caffe2::CPU); if (tensor_proto->data_type() == caffe2::TensorProto::STRING) { int total_size = tensor_proto->string_data_size(); for (size_t i = 0; i < total_size; i++) { @@ -298,7 +298,7 @@ void writeOutput( #endif } else { writeTextOutput( - workspace->GetBlob(name)->GetMutableTensor(caffe2::CPU), + BlobGetMutableTensor(workspace->GetBlob(name), caffe2::CPU), output_prefix, name); } diff --git a/binaries/speed_benchmark.cc b/binaries/speed_benchmark.cc index 5914e3f58b4..fd502cf3c07 100644 --- a/binaries/speed_benchmark.cc +++ b/binaries/speed_benchmark.cc @@ -137,7 +137,7 @@ int main(int argc, char** argv) { if (blob == nullptr) { blob = workspace->CreateBlob(input_names[i]); } - caffe2::TensorCPU* tensor = blob->GetMutableTensor(caffe2::CPU); + caffe2::TensorCPU* tensor = BlobGetMutableTensor(blob, caffe2::CPU); CHECK_NOTNULL(tensor); tensor->Resize(input_dims); if (input_type_list[i] == "uint8_t") { diff --git a/caffe2/contrib/gloo/common.cc b/caffe2/contrib/gloo/common.cc index 21ce0343d81..d4929938f19 100644 --- a/caffe2/contrib/gloo/common.cc +++ b/caffe2/contrib/gloo/common.cc @@ -12,7 +12,7 @@ namespace caffe2 { namespace gloo { void signalFailure(Blob* status_blob, std::exception& /* unused */) { - auto* res = status_blob->GetMutableTensor(CPU); + auto* res = BlobGetMutableTensor(status_blob, CPU); res->Resize(1); res->template mutable_data()[0] = 1; } diff --git a/caffe2/contrib/nervana/nervana_fc_op_gpu_test.cc b/caffe2/contrib/nervana/nervana_fc_op_gpu_test.cc index 972d9231dcf..9eee8973142 100644 --- a/caffe2/contrib/nervana/nervana_fc_op_gpu_test.cc +++ b/caffe2/contrib/nervana/nervana_fc_op_gpu_test.cc @@ -22,7 +22,7 @@ static void AddConstInput(const std::vector& shape, const float value, option.set_device_type(PROTO_CUDA); CUDAContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CUDA); + auto* tensor = BlobGetMutableTensor(blob, CUDA); tensor->Resize(shape); math::Set(tensor->size(), value, tensor->mutable_data(), diff --git a/caffe2/contrib/tensorrt/tensorrt_tranformer.cc b/caffe2/contrib/tensorrt/tensorrt_tranformer.cc index 3612d8b46f1..2dd17e00169 100644 --- a/caffe2/contrib/tensorrt/tensorrt_tranformer.cc +++ b/caffe2/contrib/tensorrt/tensorrt_tranformer.cc @@ -95,10 +95,10 @@ void BlobToTensorProto( } // Set values - if (blob->IsTensorType(CPU)) { + if (BlobIsTensorType(*blob, CPU)) { const auto& cpu_tensor = blob->template Get(); CPUTensorToTensorProto(cpu_tensor, t); - } else if (blob->IsTensorType(CUDA)) { + } else if (BlobIsTensorType(*blob, CUDA)) { const auto& cuda_tensor = blob->template Get(); const auto cpu_tensor = TensorCPU(cuda_tensor, context); context->FinishDeviceComputation(); diff --git a/caffe2/core/blob.h b/caffe2/core/blob.h index 870fc88322b..80470cea443 100644 --- a/caffe2/core/blob.h +++ b/caffe2/core/blob.h @@ -6,16 +6,16 @@ #include #include #include - -#include "caffe2/core/blob_serializer_base.h" #include "caffe2/core/common.h" + +#include #include "caffe2/core/logging.h" #include "caffe2/core/tensor.h" -#include "caffe2/core/typeid.h" -#include "caffe2/proto/caffe2_pb.h" namespace caffe2 { +class Tensor; + /** * @brief Blob is a general container that hosts a typed pointer. * @@ -50,15 +50,6 @@ class CAFFE2_API Blob final { return meta_.Match(); } - bool IsTensorType(DeviceType device_type) const { - bool is_match = meta_.Match(); - auto* tensor = static_cast(pointer_); - if (is_match && tensor && tensor->GetDeviceType() == device_type) { - return true; - } - return false; - } - /** * Returns the meta info of the blob. */ @@ -109,9 +100,6 @@ class CAFFE2_API Blob final { std::is_default_constructible::value, "GetMutable can't be called with non-default-constructible types. " "Try using specialized methods"); - static_assert( - !std::is_same::value, - "Use GetMutableTensor(DeviceType) instead"); if (IsType()) { return static_cast(pointer_); } else { @@ -129,16 +117,6 @@ class CAFFE2_API Blob final { } } - inline Tensor* GetMutableTensor(DeviceType device_type) { - if (IsTensorType(device_type)) { - return static_cast(pointer_); - } else { - VLOG(1) << "Create new mutable object " << TypeMeta::TypeName() - << " DeviceType:" << device_type; - return Reset(new Tensor(device_type)); - } - } - /** * Sets the underlying object to the allocated one. The Blob then takes over * the ownership of the passed in pointer. If there is already an object in @@ -248,5 +226,29 @@ inline void swap(Blob& lhs, Blob& rhs) { lhs.swap(rhs); } +inline bool BlobIsTensorType(const Blob& blob, DeviceType device_type) { + bool is_match = blob.meta().Match(); + if (!is_match) { + return false; + } + const Tensor* tensor = &blob.Get(); + return tensor && tensor->GetDeviceType() == device_type; +} + +inline Tensor* BlobGetMutableTensor(Blob* blob, DeviceType device_type) { + if (blob->IsType()) { + Tensor* tensor = blob->GetMutable(); + if (tensor->GetDeviceType() == device_type) { + return tensor; + } + } + + // if we're here, then either Blob didn't hold a Tensor + // or that Tensor had the wrong DeviceType. + VLOG(1) << "Create new mutable object " << TypeMeta::TypeName() + << " DeviceType:" << device_type; + return blob->Reset(new Tensor(device_type)); +} + } // namespace caffe2 #endif // CAFFE2_CORE_BLOB_H_ diff --git a/caffe2/core/blob_gpu_test.cc b/caffe2/core/blob_gpu_test.cc index e8fdf47f69d..55eafdede72 100644 --- a/caffe2/core/blob_gpu_test.cc +++ b/caffe2/core/blob_gpu_test.cc @@ -132,7 +132,7 @@ TYPED_TEST(TensorGPUDeathTest, CannotAccessDataWhenEmpty) { for (int i = 0; i < 6; ++i) { \ cpu_tensor.mutable_data()[i] = static_cast(i); \ } \ - blob.GetMutableTensor(CUDA)->CopyFrom(cpu_tensor); \ + BlobGetMutableTensor(&blob, CUDA)->CopyFrom(cpu_tensor); \ string serialized = SerializeBlob(blob, "test"); \ BlobProto proto; \ CAFFE_ENFORCE(proto.ParseFromString(serialized)); \ @@ -149,7 +149,7 @@ TYPED_TEST(TensorGPUDeathTest, CannotAccessDataWhenEmpty) { } \ Blob new_blob; \ EXPECT_NO_THROW(DeserializeBlob(serialized, &new_blob)); \ - EXPECT_TRUE(new_blob.IsTensorType(CUDA)); \ + EXPECT_TRUE(BlobIsTensorType(new_blob, CUDA)); \ Tensor new_cpu_tensor(blob.Get(), CPU); \ EXPECT_EQ(new_cpu_tensor.ndim(), 2); \ EXPECT_EQ(new_cpu_tensor.dim(0), 2); \ @@ -199,7 +199,7 @@ TEST(TensorTest, TensorSerializationMultiDevices) { // Test if the restored blob is still of the same device. blob.Reset(); EXPECT_NO_THROW(DeserializeBlob(serialized, &blob)); - EXPECT_TRUE(blob.IsTensorType(CUDA)); + EXPECT_TRUE(BlobIsTensorType(blob, CUDA)); EXPECT_EQ(GetGPUIDForPointer(blob.Get().data()), gpu_id); // Test if we force the restored blob on a different device, we @@ -207,7 +207,7 @@ TEST(TensorTest, TensorSerializationMultiDevices) { blob.Reset(); proto.mutable_tensor()->mutable_device_detail()->set_cuda_gpu_id(0); EXPECT_NO_THROW(DeserializeBlob(proto.SerializeAsString(), &blob)); - EXPECT_TRUE(blob.IsTensorType(CUDA)); + EXPECT_TRUE(BlobIsTensorType(blob, CUDA)); EXPECT_EQ(GetGPUIDForPointer(blob.Get().data()), 0); } } diff --git a/caffe2/core/blob_serialization.cc b/caffe2/core/blob_serialization.cc index 7ff5a2b25ea..d4ef19db69c 100644 --- a/caffe2/core/blob_serialization.cc +++ b/caffe2/core/blob_serialization.cc @@ -363,7 +363,8 @@ void TensorDeserializer::Deserialize(const BlobProto& blob_proto, Blob* blob) { auto tensor_proto = blob_proto.tensor(); Deserialize( tensor_proto, - blob->GetMutableTensor( + BlobGetMutableTensor( + blob, static_cast(tensor_proto.device_detail().device_type()))); } diff --git a/caffe2/core/blob_test.cc b/caffe2/core/blob_test.cc index 24b2a2d0593..bb2f4ba6a91 100644 --- a/caffe2/core/blob_test.cc +++ b/caffe2/core/blob_test.cc @@ -86,15 +86,15 @@ TEST(BlobTest, Blob) { int* int_unused CAFFE2_UNUSED = blob.GetMutable(); EXPECT_TRUE(blob.IsType()); EXPECT_FALSE(blob.IsType()); - EXPECT_FALSE(blob.IsTensorType(CPU)); + EXPECT_FALSE(BlobIsTensorType(blob, CPU)); BlobTestFoo* foo_unused CAFFE2_UNUSED = blob.GetMutable(); EXPECT_TRUE(blob.IsType()); EXPECT_FALSE(blob.IsType()); - EXPECT_FALSE(blob.IsTensorType(CPU)); + EXPECT_FALSE(BlobIsTensorType(blob, CPU)); - Tensor* tensor_unused CAFFE2_UNUSED = blob.GetMutableTensor(CPU); - EXPECT_TRUE(blob.IsTensorType(CPU)); + Tensor* tensor_unused CAFFE2_UNUSED = BlobGetMutableTensor(&blob, CPU); + EXPECT_TRUE(BlobIsTensorType(blob, CPU)); EXPECT_FALSE(blob.IsType()); EXPECT_FALSE(blob.IsType()); } @@ -600,7 +600,7 @@ TEST(TensorDeathTest, CannotCastDownLargeDims) { #define TEST_SERIALIZATION_WITH_TYPE(TypeParam, field_name) \ TEST(TensorTest, TensorSerialization_##TypeParam) { \ Blob blob; \ - Tensor* tensor = blob.GetMutableTensor(CPU); \ + Tensor* tensor = BlobGetMutableTensor(&blob, CPU); \ tensor->Resize(2, 3); \ for (int i = 0; i < 6; ++i) { \ tensor->mutable_data()[i] = static_cast(i); \ @@ -621,7 +621,7 @@ TEST(TensorDeathTest, CannotCastDownLargeDims) { } \ Blob new_blob; \ EXPECT_NO_THROW(DeserializeBlob(serialized, &new_blob)); \ - EXPECT_TRUE(new_blob.IsTensorType(CPU)); \ + EXPECT_TRUE(BlobIsTensorType(new_blob, CPU)); \ const TensorCPU& new_tensor = blob.Get(); \ EXPECT_EQ(new_tensor.ndim(), 2); \ EXPECT_EQ(new_tensor.dim(0), 2); \ @@ -634,7 +634,7 @@ TEST(TensorDeathTest, CannotCastDownLargeDims) { \ TEST(EmptyTensorTest, TensorSerialization_##TypeParam) { \ Blob blob; \ - TensorCPU* tensor = blob.GetMutableTensor(CPU); \ + TensorCPU* tensor = BlobGetMutableTensor(&blob, CPU); \ tensor->Resize(0, 3); \ tensor->mutable_data(); \ string serialized = SerializeBlob(blob, "test"); \ @@ -650,7 +650,7 @@ TEST(TensorDeathTest, CannotCastDownLargeDims) { EXPECT_EQ(tensor_proto.field_name##_size(), 0); \ Blob new_blob; \ EXPECT_NO_THROW(DeserializeBlob(serialized, &new_blob)); \ - EXPECT_TRUE(new_blob.IsTensorType(CPU)); \ + EXPECT_TRUE(BlobIsTensorType(new_blob, CPU)); \ const TensorCPU& new_tensor = blob.Get(); \ EXPECT_EQ(new_tensor.ndim(), 2); \ EXPECT_EQ(new_tensor.dim(0), 0); \ @@ -669,7 +669,7 @@ TEST_SERIALIZATION_WITH_TYPE(int64_t, int64_data) TEST(TensorTest, TensorSerialization_CustomType) { Blob blob; - TensorCPU* tensor = blob.GetMutableTensor(CPU); + TensorCPU* tensor = BlobGetMutableTensor(&blob, CPU); tensor->Resize(2, 3); for (int i = 0; i < 6; ++i) { tensor->mutable_data()[i].val = i; @@ -681,7 +681,7 @@ TEST(TensorTest, TensorSerialization_CustomType) { EXPECT_EQ(proto.type(), "Tensor"); Blob new_blob; EXPECT_NO_THROW(DeserializeBlob(serialized, &new_blob)); - EXPECT_TRUE(new_blob.IsTensorType(CPU)); + EXPECT_TRUE(BlobIsTensorType(new_blob, CPU)); const TensorCPU& new_tensor = blob.Get(); EXPECT_EQ(new_tensor.ndim(), 2); EXPECT_EQ(new_tensor.dim(0), 2); @@ -696,7 +696,7 @@ TEST(TensorTest, TensorSerialization_CustomType) { TEST(TensorTest, Half) { const int64_t kSize = 3000000; Blob blob; - TensorCPU* tensor = blob.GetMutableTensor(CPU); + TensorCPU* tensor = BlobGetMutableTensor(&blob, CPU); tensor->Resize(kSize); for (int i = 0; i < tensor->size(); ++i) { tensor->mutable_data()[i].x = i % 10000; @@ -724,7 +724,7 @@ TEST(TensorTest, Half) { } Blob new_blob; EXPECT_NO_THROW(DeserializeBlob(serialized, &new_blob)); - EXPECT_TRUE(new_blob.IsTensorType(CPU)); + EXPECT_TRUE(BlobIsTensorType(new_blob, CPU)); const TensorCPU& new_tensor = blob.Get(); EXPECT_EQ(new_tensor.ndim(), 1); EXPECT_EQ(new_tensor.dim(0), kSize); @@ -860,7 +860,7 @@ TYPED_TEST(TypedTensorTest, BigTensorSerialization) { { VLOG(1) << "Test begin"; Blob blob; - Tensor* tensor = blob.GetMutableTensor(CPU); + Tensor* tensor = BlobGetMutableTensor(&blob, CPU); VLOG(1) << "Allocating blob"; tensor->Resize(d1, d2); auto mutableData = tensor->mutable_data(); @@ -903,7 +903,7 @@ TYPED_TEST(TypedTensorTest, BigTensorSerialization) { load_op->Run(); VLOG(1) << "Reading blob from workspace"; auto new_blob = ws.GetBlob("test"); - EXPECT_TRUE(new_blob->IsTensorType(CPU)); + EXPECT_TRUE(BlobIsTensorType(*new_blob, CPU)); const auto& new_tensor = new_blob->Get(); EXPECT_EQ(new_tensor.ndim(), d1); @@ -1030,7 +1030,7 @@ TEST(CustomChunkSize, BigTensorSerialization) { int64_t size = d1 * d2; Blob blob; - TensorCPU* tensor = blob.GetMutableTensor(CPU); + TensorCPU* tensor = BlobGetMutableTensor(&blob, CPU); tensor->Resize(d1, d2); tensor->mutable_data(); std::mutex mutex; diff --git a/caffe2/core/operator.h b/caffe2/core/operator.h index 25aa801d265..f5683d14973 100644 --- a/caffe2/core/operator.h +++ b/caffe2/core/operator.h @@ -122,7 +122,7 @@ class CAFFE2_API OperatorBase : public Observable { static_assert( std::is_same::value, "Output(int, DeviceType) is only available for Tensor"); - return outputs_.at(idx)->GetMutableTensor(type); + return BlobGetMutableTensor(outputs_.at(idx), type); } template @@ -149,7 +149,7 @@ class CAFFE2_API OperatorBase : public Observable { } inline bool InputIsTensorType(int idx, DeviceType device_type) { - return inputs_.at(idx)->IsTensorType(device_type); + return BlobIsTensorType(*inputs_.at(idx), device_type); } template @@ -162,7 +162,7 @@ class CAFFE2_API OperatorBase : public Observable { } inline bool OutputIsTensorType(int idx, DeviceType type) { - return outputs_.at(idx)->IsTensorType(type); + return BlobIsTensorType(*outputs_.at(idx), type); } inline int InputSize() const { diff --git a/caffe2/core/plan_executor.cc b/caffe2/core/plan_executor.cc index 2c0ad9e7a81..8e48b6b7bea 100644 --- a/caffe2/core/plan_executor.cc +++ b/caffe2/core/plan_executor.cc @@ -131,7 +131,8 @@ struct WorkspaceIdInjector { "Integer overflow while calculating GLOBAL_WORKSPACE_ID blob"); int32_t global_ws_id = (seq_++) + (static_cast(node_id) << 16); Blob* global_ws_id_blob = workspace->CreateLocalBlob(GLOBAL_WORKSPACE_ID); - TensorCPU* global_ws_id_tensor = global_ws_id_blob->GetMutableTensor(CPU); + TensorCPU* global_ws_id_tensor = + BlobGetMutableTensor(global_ws_id_blob, CPU); global_ws_id_tensor->Resize(); global_ws_id_tensor->template mutable_data()[0] = global_ws_id; VLOG(1) << "Adding " << GLOBAL_WORKSPACE_ID << " = " << global_ws_id; diff --git a/caffe2/core/workspace.h b/caffe2/core/workspace.h index 11bf9c413c5..cbc58f742c2 100644 --- a/caffe2/core/workspace.h +++ b/caffe2/core/workspace.h @@ -151,7 +151,7 @@ class CAFFE2_API Workspace { auto* to_blob = CreateBlob(blob); CAFFE_ENFORCE(to_blob); const auto& from_tensor = from_blob->template Get(); - auto* to_tensor = to_blob->GetMutableTensor(Context::GetDeviceType()); + auto* to_tensor = BlobGetMutableTensor(to_blob, Context::GetDeviceType()); to_tensor->CopyFrom(from_tensor); } } diff --git a/caffe2/ideep/operators/concat_split_op.cc b/caffe2/ideep/operators/concat_split_op.cc index 8d011cd3be8..38ffdc99426 100644 --- a/caffe2/ideep/operators/concat_split_op.cc +++ b/caffe2/ideep/operators/concat_split_op.cc @@ -33,8 +33,9 @@ class IDEEPConcatOp final : public IDEEPOperator { if (OperatorBase::InputBlob(i).template IsType()) { inputs.emplace_back(Input(i)); } else { - CAFFE_ENFORCE(OperatorBase::InputBlob(i).IsTensorType(CPU), - "Expect cpu tensor if not itensor"); + CAFFE_ENFORCE( + BlobIsTensorType(OperatorBase::InputBlob(i), CPU), + "Expect cpu tensor if not itensor"); auto& tensor_cpu = OperatorBase::Input(i, CPU); CAFFE_ENFORCE(tensor_cpu.dims().size() == 0 || tensor_cpu.size_from_dim(0) == 0, diff --git a/caffe2/ideep/operators/operator_fallback_ideep.h b/caffe2/ideep/operators/operator_fallback_ideep.h index 08e6de2ae3f..3226a08c4af 100644 --- a/caffe2/ideep/operators/operator_fallback_ideep.h +++ b/caffe2/ideep/operators/operator_fallback_ideep.h @@ -89,7 +89,7 @@ class IDEEPFallbackOp final : public IDEEPOperator { local_input_blobs_[i]->Reset(); } input_share_[i] = false; - auto dtensor = local_input_blobs_[i]->GetMutableTensor(CPU); + auto dtensor = BlobGetMutableTensor(local_input_blobs_[i], CPU); dtensor->Resize(input.get_dims()); if (input.is_public_format()) { dtensor->ShareExternalPointer( @@ -121,7 +121,7 @@ class IDEEPFallbackOp final : public IDEEPOperator { continue; } CAFFE_ENFORCE( - local_output_blobs_[i]->IsTensorType(CPU), + BlobIsTensorType(*local_output_blobs_[i], CPU), "IDEEP fallback op currently does not support non-TensorCPU " "output type who needs copying."); const auto& src = local_output_blobs_[i]->template Get(); @@ -153,7 +153,7 @@ class IDEEPFallbackOp final : public IDEEPOperator { VLOG(2) << "Output " << base_def_.output(i) << " as CPUTensor"; Blob* dst = OperatorBase::OutputBlob(i); dst->Reset(new Tensor(CPU)); - auto dtensor = dst->GetMutableTensor(CPU); + auto dtensor = BlobGetMutableTensor(dst, CPU); dtensor->Resize(src_dims); dtensor->ShareData(src); } diff --git a/caffe2/ideep/operators/utility_ops.cc b/caffe2/ideep/operators/utility_ops.cc index 626568a989b..468a42df1a9 100644 --- a/caffe2/ideep/operators/utility_ops.cc +++ b/caffe2/ideep/operators/utility_ops.cc @@ -31,7 +31,7 @@ class CopyIDEEPToCPUOp final : public IDEEPOperator { USE_IDEEP_DEF_ALIASES(); bool RunOnDevice() override { const auto& input_blob = OperatorBase::InputBlob(0); - if (input_blob.IsTensorType(CPU)) { + if (BlobIsTensorType(input_blob, CPU)) { VLOG(2) << "Directing sharing of TensorCPU"; const auto& X = OperatorBase::Input(0, CPU); auto* Y = OperatorBase::Output(0, CPU); diff --git a/caffe2/mkl/operators/operator_fallback_mkl.h b/caffe2/mkl/operators/operator_fallback_mkl.h index 6d9713b7461..a3135758813 100644 --- a/caffe2/mkl/operators/operator_fallback_mkl.h +++ b/caffe2/mkl/operators/operator_fallback_mkl.h @@ -66,10 +66,10 @@ class MKLFallbackOp final : public Operator { for (int i = 0; i < InputSize(); ++i) { if (OperatorBase::InputIsType>(i)) { OperatorBase::Input>(i).CopyTo( - local_input_blobs_[i]->GetMutableTensor(CPU)); + BlobGetMutableTensor(local_input_blobs_[i], CPU)); } else if (OperatorBase::InputIsType>(i)) { OperatorBase::Input>(i).CopyTo( - local_input_blobs_[i]->GetMutableTensor(CPU)); + BlobGetMutableTensor(local_input_blobs_[i], CPU)); } else { VLOG(1) << "Input " << i << " is not MKLMemory. Skipping copy."; // Note(jiayq): This removes a const but conceptually @@ -93,7 +93,7 @@ class MKLFallbackOp final : public Operator { continue; } CAFFE_ENFORCE( - local_output_blobs_[i]->IsTensorType(CPU), + BlobIsTensorType(*local_output_blobs_[i], CPU), "MKL fallback op currently does not support non-TensorCPU " "output type who needs copying."); const auto& src = local_output_blobs_[i]->template Get(); diff --git a/caffe2/mobile/contrib/arm-compute/operators/copy_op.cc b/caffe2/mobile/contrib/arm-compute/operators/copy_op.cc index 111af03f860..06ec2b50acc 100644 --- a/caffe2/mobile/contrib/arm-compute/operators/copy_op.cc +++ b/caffe2/mobile/contrib/arm-compute/operators/copy_op.cc @@ -43,7 +43,7 @@ bool CopyFromGLOp::RunOnDevice() { if (first_run_) { first_run_ = false; for (int i = 0; i < Inputs().size(); ++i) { - auto* Y = OperatorBase::Outputs()[i]->GetMutableTensor(CPU); + auto* Y = BlobGetMutableTensor(OperatorBase::Outputs()[i], CPU); Y->Resize(inputs_[i]->dims()); Y->template mutable_data(); } @@ -54,7 +54,7 @@ bool CopyFromGLOp::RunOnDevice() { // GLTensor auto* X = inputs_[i].get(); X->lazy_allocate(Xblob, second_run_, true); - auto* Y = OperatorBase::Outputs()[i]->GetMutableTensor(CPU); + auto* Y = BlobGetMutableTensor(OperatorBase::Outputs()[i], CPU); Timer timer; timer.Start(); getTensorCPU(*X, *Y); diff --git a/caffe2/mobile/contrib/arm-compute/test/gl_operator_test.h b/caffe2/mobile/contrib/arm-compute/test/gl_operator_test.h index daa7ef008fc..68f79e84a89 100644 --- a/caffe2/mobile/contrib/arm-compute/test/gl_operator_test.h +++ b/caffe2/mobile/contrib/arm-compute/test/gl_operator_test.h @@ -27,7 +27,7 @@ template void PopulateCPUBlob(Workspace *ws, bool random, std::string name, std::vector dims, int val = 1, int dist_shift = 0, float variance = 1) { Blob *blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(dims); T *t_data = tensor->mutable_data(); std::random_device rd; diff --git a/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm b/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm index 52f746f63f3..742f8e48f4e 100644 --- a/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm +++ b/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm @@ -489,13 +489,13 @@ class MPSCNNPackedInt8BGRANHWCToNCHWCStylizerPreprocessOp final "noise_size", 491 /* prime to avoid artifacts */); // Treaded as half4 in the kernel, so need half4 here. noiseSize = divRoundUp(noiseSize, 4) * 4; - if (!noiseBlob->IsTensorType(CPU) || + if (!BlobIsTensorType(*noiseBlob, CPU) || noiseBlob->Get().size() != noiseSize) { VLOG(2) << "Initializing stylizer with noise: " << noiseSize; caffe2::Timer rt; // Initialize random noise on first use. // Cache it to maintain temporal consistency. - auto* t = noiseBlob->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(noiseBlob, CPU); t->Resize(noiseSize); math::RandGaussian( t->size(), diff --git a/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm b/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm index 7216b16611a..7ac629019c5 100644 --- a/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm +++ b/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm @@ -94,7 +94,7 @@ void testMPSCNN() { Workspace ws; for (auto i = 0; i < N; ++i) { - auto* t = ws.CreateBlob(cpu(i))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob(cpu(i)), CPU); t->Resize(BS, C, H, W); CPUContext ctx; math::RandGaussian( @@ -152,7 +152,7 @@ void testMPSCNN() { Workspace ws; for (auto i = 0; i < N; ++i) { - auto* t = ws.CreateBlob(cpu(i))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob(cpu(i)), CPU); switch (ndim) { case 1: t->Resize(5); @@ -210,7 +210,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNNormalizePlanarYUV Test: "; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batch_size, channels, 8, 13); CPUContext ctx; math::RandGaussian( @@ -218,14 +218,14 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(1, channels); CPUContext ctx; math::RandGaussian( t->size(), 0, 1, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("stddev")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("stddev"), CPU); t->Resize(1, channels); CPUContext ctx; math::RandUniform( @@ -290,7 +290,7 @@ void testMPSCNN() { for (const auto dim : {10, 40}) { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, channels, dim, dim); CPUContext ctx; // Too noisy. @@ -299,7 +299,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(channels); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -310,7 +310,7 @@ void testMPSCNN() { // t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(channels); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -321,7 +321,7 @@ void testMPSCNN() { // t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("pw")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("pw"), CPU); t->Resize(prelu == PreluTy::SHARED ? 1 : channels); CPUContext ctx; // Too noisy. @@ -409,7 +409,7 @@ void testMPSCNN() { Workspace ws; const auto channels = array ? 12 : 3; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batch_size, channels, 8, 13); CPUContext ctx; math::RandGaussian( @@ -417,7 +417,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(shared ? channels : 1); CPUContext ctx; math::RandGaussian( @@ -480,7 +480,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNSpatialBN Test: " << channels; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batch_size, channels, 8, 13); CPUContext ctx; math::RandGaussian( @@ -488,7 +488,7 @@ void testMPSCNN() { } for (const std::string name : {"scale", "bias", "mean", "var"}) { - auto* t = ws.CreateBlob(name)->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob(name), CPU); t->Resize(channels); CPUContext ctx; // High mean to avoid var division by zero. @@ -575,7 +575,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNFC Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, CIn, H, W); CPUContext ctx; math::RandGaussian( @@ -583,7 +583,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(COut, CIn * H * W); CPUContext ctx; math::RandGaussian( @@ -591,7 +591,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(COut); CPUContext ctx; math::RandGaussian( @@ -682,8 +682,8 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNPool Test: " << pool; Workspace ws; { - auto* t = - ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor( + ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, 8, 8, 13); CPUContext ctx; math::RandGaussian( @@ -784,7 +784,7 @@ void testMPSCNN() { std::vector>{{1, 3, 50, 80}, {1, 12, 50, 80}}) { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(dims); CPUContext ctx; math::RandGaussian( @@ -860,7 +860,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNPreprocess Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 8, 13, 4); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -869,7 +869,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(3); CPUContext ctx; t->mutable_data()[0] = 100; @@ -940,7 +940,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNDeprocess Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 3, 8, 24); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -949,7 +949,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(3); CPUContext ctx; t->mutable_data()[0] = 100; @@ -999,7 +999,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNDeprocess Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 3, 1280, 720); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1008,7 +1008,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(3); CPUContext ctx; t->mutable_data()[0] = 30; @@ -1072,7 +1072,8 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNConv Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = + BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1080,7 +1081,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(8, 12, kernel_h, kernel_w); CPUContext ctx; math::RandGaussian( @@ -1092,7 +1093,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(8); CPUContext ctx; math::RandGaussian( @@ -1188,7 +1189,7 @@ void testMPSCNN() { Workspace ws; int output_channels = input_channels * channel_multiplier; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, input_channels, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1196,7 +1197,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(output_channels, 1, 3, 3); CPUContext ctx; math::RandGaussian( @@ -1204,7 +1205,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(output_channels); CPUContext ctx; math::RandGaussian( @@ -1275,7 +1276,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNConvRelu Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1283,7 +1284,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(8, 12, 3, 3); CPUContext ctx; math::RandGaussian( @@ -1291,7 +1292,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(8); CPUContext ctx; math::RandGaussian( @@ -1385,7 +1386,7 @@ void testMPSCNN() { LOG(INFO) << "MPSConv Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1393,7 +1394,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(8, 12, 3, 3); CPUContext ctx; math::RandGaussian( @@ -1401,7 +1402,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(8); CPUContext ctx; math::RandGaussian( @@ -1493,7 +1494,7 @@ void testMPSCNN() { LOG(INFO) << "MPSConv Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, C, 12, 16); CPUContext ctx; math::RandGaussian( @@ -1501,7 +1502,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(M, C, K, K); CPUContext ctx; math::RandGaussian( @@ -1509,7 +1510,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(M); CPUContext ctx; math::RandGaussian( @@ -1607,7 +1608,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNConv Test - group"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, C, 12, 16); CPUContext ctx; math::RandGaussian( @@ -1615,7 +1616,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(M, C / group, K, K); CPUContext ctx; math::RandGaussian( @@ -1623,7 +1624,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(M); CPUContext ctx; math::RandGaussian( @@ -1726,7 +1727,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNMul Test"; Workspace ws; { - auto* t = ws.CreateBlob("X0_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X0_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1734,7 +1735,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("X1_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X1_cpu"), CPU); t->Resize(72); CPUContext ctx; math::RandGaussian( @@ -1791,7 +1792,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNSub Test"; Workspace ws; { - auto* t = ws.CreateBlob("X0_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X0_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1799,7 +1800,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("X1_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X1_cpu"), CPU); t->Resize(72); CPUContext ctx; math::RandGaussian( @@ -1856,7 +1857,7 @@ void testMPSCNN() { LOG(INFO) << "MPSAdd Test"; Workspace ws; { - auto* t = ws.CreateBlob("X0_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X0_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1864,7 +1865,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("X1_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X1_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1921,7 +1922,7 @@ void testMPSCNN() { LOG(INFO) << "MPSAdd Test"; Workspace ws; { - auto* t = ws.CreateBlob("X0_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X0_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -1929,7 +1930,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("X1_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X1_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -2011,7 +2012,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNNeuron Test: " << n; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 4, 12, 12); CPUContext ctx; math::RandGaussian( @@ -2065,7 +2066,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNDropout Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 12, 57, 72); CPUContext ctx; math::RandGaussian( @@ -2136,7 +2137,7 @@ void testMPSCNN() { << " - scale: " << scale; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, channels, 40, 40); CPUContext ctx; math::RandGaussian( @@ -2144,7 +2145,7 @@ void testMPSCNN() { } { // Use the batch-first encoding (n, [bbox]) - auto* t = ws.CreateBlob("R")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("R"), CPU); t->Resize(6, 5); for (auto i = 0; i < t->dim32(0); ++i) { t->mutable_data()[5 * i + 0] = 0; // batch @@ -2250,14 +2251,14 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNRoIWarp Test 2"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(1, 8, 40, 40); CPUContext ctx; math::RandGaussian( t->size(), 4, 2, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("R")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("R"), CPU); t->Resize(6, 4); for (auto i = 0; i < t->dim32(0); ++i) { t->mutable_data()[4 * i + 0] = (i % 4 + 1) * 1.0 / scale; @@ -2362,7 +2363,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNResizeNearestOp Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, 37, 89); CPUContext ctx; math::RandGaussian( @@ -2497,7 +2498,7 @@ void testMPSCNN() { vector im_info{60, 80, 0.166667}; vector anchors{-38, -16, 53, 31, -120, -120, 135, 135}; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(num_images, A, H, W); for (auto i = 0; i < t->size(); ++i) { t->mutable_data()[i] = scores[i]; @@ -2505,7 +2506,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("bbox_delta_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("bbox_delta_cpu"), CPU); t->Resize(num_images, 4 * A, H, W); for (auto i = 0; i < t->size(); ++i) { t->mutable_data()[i] = bbx[i]; @@ -2513,7 +2514,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("im_info")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("im_info"), CPU); t->Resize(num_images, 3); for (auto i = 0; i < t->size(); ++i) { t->mutable_data()[i] = im_info[i]; @@ -2521,7 +2522,7 @@ void testMPSCNN() { } { - auto* t = ws.CreateBlob("anchors")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("anchors"), CPU); t->Resize(A, 4); for (auto i = 0; i < t->size(); ++i) { t->mutable_data()[i] = anchors[i]; @@ -2587,7 +2588,7 @@ void testMPSCNN() { LOG(INFO) << "MPSCNNSoftmax Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); // Only works for spatial dimension of (1, 1) - weird. t->Resize(batchSize, 12, 1, 1); CPUContext ctx; @@ -2661,8 +2662,8 @@ void testMPSCNN() { LOG(INFO) << "MPSConvTranspose Test"; Workspace ws; { - auto* t = - ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor( + ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, inputChannels, 8, 12); CPUContext ctx; math::RandGaussian( @@ -2675,7 +2676,7 @@ void testMPSCNN() { { auto* t = - ws.CreateBlob("W")->GetMutableTensor(CPU); + BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize( inputChannels, outputChannels, @@ -2692,7 +2693,7 @@ void testMPSCNN() { { auto* t = - ws.CreateBlob("b")->GetMutableTensor(CPU); + BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(outputChannels); CPUContext ctx; math::RandGaussian( @@ -2809,7 +2810,7 @@ void testMPSCNN() { << batchSize; Workspace ws; for (auto i = 0; i < numInputs; ++i) { - auto* t = ws.CreateBlob(cpu(i))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob(cpu(i)), CPU); t->Resize(batchSize, array ? (i + 1) * 4 : 4, 10, 10); CPUContext ctx; math::RandGaussian( @@ -2891,7 +2892,7 @@ void testMPSCNN() { } Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(batchSize, inputChannels, 53, 47); CPUContext ctx; math::RandGaussian( @@ -2964,7 +2965,7 @@ void testMPSCNN() { << numInputs << ", " << batchSize; Workspace ws; for (auto i = 0; i < numInputs; ++i) { - auto* t = ws.CreateBlob(cpu(i))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob(cpu(i)), CPU); t->Resize(batchSize, channelCount, 9, 17); CPUContext ctx; math::RandGaussian( @@ -3336,8 +3337,8 @@ void compareModels(const NetDef& initNet, NetDef predictNet) { Workspace cws; cws.RunNetOnce(initNet); { - auto* t = - cws.CreateBlob(predictNet.external_input(0))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor( + cws.CreateBlob(predictNet.external_input(0)), CPU); t->Resize(1, 224, 224, 4); for (auto i = 0; i < t->size(); ++i) { t->mutable_data()[i] = i % 225; @@ -3348,8 +3349,8 @@ void compareModels(const NetDef& initNet, NetDef predictNet) { Workspace mws; mws.RunNetOnce(initNet); { - auto* t = - mws.CreateBlob(predictNet.external_input(0))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor( + mws.CreateBlob(predictNet.external_input(0)), CPU); t->Resize(1, 224, 224, 4); for (auto i = 0; i < t->size(); ++i) { t->mutable_data()[i] = i % 225; @@ -3397,16 +3398,16 @@ void verifyRewrite( dumpDef(predictNet); dumpDef(metalPredictNet); -#define RUN_NET(ws, predictNet) \ - ws.RunNetOnce(initNet); \ - { \ - auto* t = \ - ws.CreateBlob(predictNet.external_input(0))->GetMutableTensor(CPU); \ - t->Resize(inputDims); \ - CPUContext ctx; \ - math::RandGaussian( \ - t->size(), 0, 1, t->mutable_data(), &ctx); \ - } \ +#define RUN_NET(ws, predictNet) \ + ws.RunNetOnce(initNet); \ + { \ + auto* t = BlobGetMutableTensor( \ + ws.CreateBlob(predictNet.external_input(0)), CPU); \ + t->Resize(inputDims); \ + CPUContext ctx; \ + math::RandGaussian( \ + t->size(), 0, 1, t->mutable_data(), &ctx); \ + } \ ws.RunNetOnce(predictNet); // initialize diff --git a/caffe2/mobile/contrib/ios/pool_test.cc b/caffe2/mobile/contrib/ios/pool_test.cc index 47fd405eef0..3f78c5d1fcd 100644 --- a/caffe2/mobile/contrib/ios/pool_test.cc +++ b/caffe2/mobile/contrib/ios/pool_test.cc @@ -16,7 +16,7 @@ void AddNoiseInput(const vector& shape, const string& name, Workspace* DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::RandGaussian( diff --git a/caffe2/mobile/contrib/ios/resize_test.cc b/caffe2/mobile/contrib/ios/resize_test.cc index 1c08df0f32a..428c395fe44 100644 --- a/caffe2/mobile/contrib/ios/resize_test.cc +++ b/caffe2/mobile/contrib/ios/resize_test.cc @@ -16,7 +16,7 @@ void AddNoiseInput(const vector& shape, const string& name, Workspace* DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::RandGaussian( diff --git a/caffe2/mobile/contrib/nnapi/nnapi.cc b/caffe2/mobile/contrib/nnapi/nnapi.cc index 45ea26c44cc..56f1fc28986 100644 --- a/caffe2/mobile/contrib/nnapi/nnapi.cc +++ b/caffe2/mobile/contrib/nnapi/nnapi.cc @@ -679,7 +679,7 @@ void NNApi::init(const TensorVector& inputs, TensorVector* outputs) { output_dims.push_back(dim); } - auto* tensor = ws_.CreateBlob(blob)->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(ws_.CreateBlob(blob), CPU); tensor->Resize(output_dims); outputs->push_back(tensor); diff --git a/caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc b/caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc index 359e7767746..c14e9ed2637 100644 --- a/caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc +++ b/caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + #include "caffe2/core/init.h" #include "caffe2/core/operator.h" #include "caffe2/core/tensor.h" @@ -43,14 +43,14 @@ static double benchmark_conv_caffe2( ws = &localWs; } { - auto* t = ws->CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; math::RandGaussian( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws->CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("W"), CPU); if (group == 1) { t->Resize(K, C, kernel, kernel); } else { @@ -61,7 +61,7 @@ static double benchmark_conv_caffe2( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws->CreateBlob("B")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("B"), CPU); t->Resize(K); CPUContext ctx; math::RandGaussian( @@ -129,14 +129,14 @@ static double benchmark_conv_nnapi( ws = &localWs; } { - auto* t = ws->CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); CPUContext ctx; math::RandGaussian( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws->CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("W"), CPU); if (group > 1) { CAFFE_ENFORCE_EQ(C, group); t->Resize(1, kernel, kernel, C); @@ -148,7 +148,7 @@ static double benchmark_conv_nnapi( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws->CreateBlob("B")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("B"), CPU); t->Resize(K); CPUContext ctx; math::RandGaussian( @@ -190,7 +190,7 @@ static double benchmark_conv_nnapi( NetDef initNet; NNApi model(initNet, netdef, ws); std::vector inputs, outputs; - inputs.push_back(ws->GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws->GetBlob("X_cpu"), CPU)); CAFFE_ENFORCE(model.run(inputs, &outputs)); for (int i = 0; i < warmup; i++) { @@ -220,14 +220,14 @@ static double benchmark_conv_nnapi_int8( ws = &localWs; } { - auto* t = ws->CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); for (int i = 0; i < t->size(); i++) { t->mutable_data()[i] = rand() % 10; } } { - auto* t = ws->CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("W"), CPU); if (group > 1) { CAFFE_ENFORCE_EQ(C, group); t->Resize(1, kernel, kernel, C); @@ -243,7 +243,7 @@ static double benchmark_conv_nnapi_int8( // should be of ANEURALNETWORKS_TENSOR_INT32, with zeroPoint of 0 and // bias_scale == input_scale * filter_scale. { - auto* t = ws->CreateBlob("B")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws->CreateBlob("B"), CPU); t->Resize(K); for (int i = 0; i < t->size(); i++) { t->mutable_data()[i] = rand() % 10; @@ -322,7 +322,7 @@ static double benchmark_conv_nnapi_int8( NetDef initNet; NNApi model(initNet, netdef, ws); std::vector inputs, outputs; - inputs.push_back(ws->GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws->GetBlob("X_cpu"), CPU)); CAFFE_ENFORCE(model.run(inputs, &outputs)); for (int i = 0; i < warmup; i++) { diff --git a/caffe2/mobile/contrib/nnapi/nnapi_test.cc b/caffe2/mobile/contrib/nnapi/nnapi_test.cc index deab1ca7b43..9b4608dc07a 100644 --- a/caffe2/mobile/contrib/nnapi/nnapi_test.cc +++ b/caffe2/mobile/contrib/nnapi/nnapi_test.cc @@ -55,7 +55,7 @@ static void test_relu(int N, int C, int H, int W) { // CPU reference Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); CPUContext ctx; math::RandGaussian( @@ -81,7 +81,7 @@ static void test_relu(int N, int C, int H, int W) { NetDef initNet; NNApi model(initNet, netdef, &ws); std::vector inputs, outputs; - inputs.push_back(ws.GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU)); EXPECT_TRUE(model.run(inputs, &outputs)); const auto& t_nn = *outputs[0]; @@ -103,21 +103,21 @@ static void test_conv_NHWC( int stride_w) { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); CPUContext ctx; math::RandGaussian( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(K, kernel, kernel, C); CPUContext ctx; math::RandGaussian( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("B")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("B"), CPU); t->Resize(K); CPUContext ctx; math::RandGaussian( @@ -189,7 +189,7 @@ static void test_conv_NHWC( NetDef initNet; NNApi model(initNet, netdef, &ws); std::vector inputs, outputs; - inputs.push_back(ws.GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU)); EXPECT_TRUE(model.run(inputs, &outputs)); const auto& t_nn = *outputs[0]; @@ -211,21 +211,21 @@ static void test_depthwise_conv_NHWC( int stride_w) { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); CPUContext ctx; math::RandGaussian( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(1, kernel, kernel, D); CPUContext ctx; math::RandGaussian( t->size(), 0, 30, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("B")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("B"), CPU); t->Resize(D); CPUContext ctx; math::RandGaussian( @@ -406,7 +406,7 @@ static void test_depthwise_conv_NHWC( NetDef initNet; NNApi model(initNet, netdef, &ws); std::vector inputs, outputs; - inputs.push_back(ws.GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU)); EXPECT_TRUE(model.run(inputs, &outputs)); const auto& t_nn = *outputs[0]; @@ -428,7 +428,7 @@ static void test_pooling( int stride_w) { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); CPUContext ctx; math::RandGaussian( @@ -496,7 +496,7 @@ static void test_pooling( NetDef initNet; NNApi model(initNet, netdef, &ws); std::vector inputs, outputs; - inputs.push_back(ws.GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU)); EXPECT_TRUE(model.run(inputs, &outputs)); const auto& t_nn = *outputs[0]; @@ -506,7 +506,7 @@ static void test_pooling( static void test_softmax(int N, int C, int H = 1, int W = 1) { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); if (H == 1 && W == 1) { t->Resize(N, C); } else { @@ -538,7 +538,7 @@ static void test_softmax(int N, int C, int H = 1, int W = 1) { NetDef initNet; NNApi model(initNet, netdef, &ws); std::vector inputs, outputs; - inputs.push_back(ws.GetBlob("X_cpu")->GetMutableTensor(CPU)); + inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU)); EXPECT_TRUE(model.run(inputs, &outputs)); const auto& t_nn = *outputs[0]; diff --git a/caffe2/mobile/contrib/opengl/test/opengl_test.cc b/caffe2/mobile/contrib/opengl/test/opengl_test.cc index 9da266c4e85..690a33cb854 100644 --- a/caffe2/mobile/contrib/opengl/test/opengl_test.cc +++ b/caffe2/mobile/contrib/opengl/test/opengl_test.cc @@ -178,7 +178,7 @@ void testOpenGLCopyOps(int N, int C, int H, int W, float error, int tile_x = 1, LOG(INFO) << "OPENGLCopyFrom/To Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; math::RandGaussian(t->size(), 0, 1, t->mutable_data(), &ctx); @@ -275,7 +275,7 @@ void testOpenGLConv(int N, << " Op: " << glPoolOperationName[poolOp]; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; if (random_input) { @@ -301,7 +301,7 @@ void testOpenGLConv(int N, } if (poolOp != AveragePool && poolOp != MaxPool) { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); if (poolOp != ConvTranspose && poolOp != ConvTransposePRelu && poolOp != ConvTransposeRelu) { t->Resize(K, C, kernel_h, kernel_w); } else { @@ -343,7 +343,7 @@ void testOpenGLConv(int N, // bias { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(K); CPUContext ctx; if (random_input) { @@ -367,7 +367,7 @@ void testOpenGLConv(int N, } if (poolOp == ConvPRelu || poolOp == ConvTransposePRelu) { - auto* t = ws.CreateBlob("p")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("p"), CPU); t->Resize(K); CPUContext ctx; if (random_input) { @@ -532,7 +532,7 @@ void testOpenGLPRelu( << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; // Too noisy. @@ -541,7 +541,7 @@ void testOpenGLPRelu( // prelu scale { - auto* t = ws.CreateBlob("p")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("p"), CPU); t->Resize(prelu_size); CPUContext ctx; math::RandGaussian(t->size(), 0, 1, t->mutable_data(), &ctx); @@ -603,7 +603,7 @@ void testOpenGLRelu(int N, int C, int H, int W, int input_tile_x, int input_tile << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; // Too noisy. @@ -664,13 +664,13 @@ void testOpenGLAdd(int N, int C, int H, int W, float error = 0.1, int input_tile << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t0 = ws.CreateBlob("X_cpu0")->GetMutableTensor(CPU); + auto* t0 = BlobGetMutableTensor(ws.CreateBlob("X_cpu0"), CPU); t0->Resize(N, C, H, W); CPUContext ctx0; // Too noisy. math::RandGaussian(t0->size(), 0, 30, t0->mutable_data(), &ctx0); - auto* t1 = ws.CreateBlob("X_cpu1")->GetMutableTensor(CPU); + auto* t1 = BlobGetMutableTensor(ws.CreateBlob("X_cpu1"), CPU); t1->Resize(N, C, H, W); CPUContext ctx1; // Too noisy. @@ -750,13 +750,13 @@ void testOpenGLSub(int N, int C, int H, int W, float error = 0.1) { Workspace ws; { - auto* t0 = ws.CreateBlob("X_cpu0")->GetMutableTensor(CPU); + auto* t0 = BlobGetMutableTensor(ws.CreateBlob("X_cpu0"), CPU); t0->Resize(N, C, H, W); CPUContext ctx0; // Too noisy. math::RandGaussian(t0->size(), 0, 30, t0->mutable_data(), &ctx0); - auto* t1 = ws.CreateBlob("X_cpu1")->GetMutableTensor(CPU); + auto* t1 = BlobGetMutableTensor(ws.CreateBlob("X_cpu1"), CPU); t1->Resize(N, C, H, W); CPUContext ctx1; // Too noisy. @@ -814,8 +814,8 @@ void testOpenGLConcat(int N, std::vector Cs, int H, int W, bool tiling = fa << "H: " << H << ", W: " << W; Workspace ws; for (int i = 0; i < Cs.size(); i++) { - auto* t = - ws.CreateBlob("X_cpu" + caffe2::to_string(i))->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor( + ws.CreateBlob("X_cpu" + caffe2::to_string(i)), CPU); t->Resize(N, Cs[i], H, W); CPUContext ctx0; // Too noisy. @@ -891,7 +891,7 @@ void testOpenGLSigmoid(int N, int C, int H, int W, float error) { << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; // Too noisy. @@ -942,7 +942,7 @@ void testOpenGLTanh(int N, int C, int H, int W, float error) { << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; math::RandGaussian(t->size(), 0, 2, t->mutable_data(), &ctx); @@ -992,14 +992,14 @@ void testOpenGLMul(int N, int C, int H, int W, float error) { << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; math::RandGaussian(t->size(), -10, 10, t->mutable_data(), &ctx); } { - auto* t = ws.CreateBlob("B")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("B"), CPU); t->Resize(1); CPUContext ctx; math::RandGaussian(t->size(), -10, 10, t->mutable_data(), &ctx); @@ -1060,7 +1060,7 @@ void testOpenGLSoftmax(int N, int D, float error, bool tiled = false) { LOG(INFO) << "OpenGL Softmax Test " << "N: " << N << " D: " << D << " Tiled:" << tiled; Workspace ws; - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); { t->Resize(N, D); CPUContext ctx; @@ -1151,7 +1151,7 @@ void testOpenGLInstanceNorm(int N, int C, int H, int W, float error) { << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; // Too noisy. @@ -1163,7 +1163,7 @@ void testOpenGLInstanceNorm(int N, int C, int H, int W, float error) { // scale { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(C); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1172,7 +1172,7 @@ void testOpenGLInstanceNorm(int N, int C, int H, int W, float error) { } // bias { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(C); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1254,7 +1254,7 @@ void testOpenGLInstanceNormPRelu(int N, int C, int H, int W, float error) { << "C: " << C << ", H: " << H << ", W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; // Too noisy. @@ -1266,7 +1266,7 @@ void testOpenGLInstanceNormPRelu(int N, int C, int H, int W, float error) { // scale { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(C); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1275,7 +1275,7 @@ void testOpenGLInstanceNormPRelu(int N, int C, int H, int W, float error) { } // bias { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(C); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1284,7 +1284,7 @@ void testOpenGLInstanceNormPRelu(int N, int C, int H, int W, float error) { } // prelu scale { - auto* t = ws.CreateBlob("p")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("p"), CPU); t->Resize(C); CPUContext ctx; math::RandGaussian(t->size(), 0, 1, t->mutable_data(), &ctx); @@ -1385,7 +1385,7 @@ void OpenGL_speedtest(int N, << " C: " << C << " H: " << H << " W: " << W; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; if (random_input) { @@ -1399,7 +1399,7 @@ void OpenGL_speedtest(int N, } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(K, C, kernel_h, kernel_w); CPUContext ctx; if (random_input) { @@ -1413,7 +1413,7 @@ void OpenGL_speedtest(int N, } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(K); CPUContext ctx; if (random_input) { @@ -1479,7 +1479,7 @@ void testOpenGLPadImage( { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; math::RandGaussian(t->size(), 0, 1, t->mutable_data(), &ctx); @@ -1593,7 +1593,7 @@ void testOpenGLResize(int N, { Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; math::RandGaussian(t->size(), 0, 1, t->mutable_data(), &ctx); @@ -1675,7 +1675,7 @@ void testOpenGLPreprocess(int N, int C, int H, int W, float error) { LOG(INFO) << "OpenGL Preprocess Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, H, W, C); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1684,7 +1684,7 @@ void testOpenGLPreprocess(int N, int C, int H, int W, float error) { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(3); CPUContext ctx; t->mutable_data()[0] = 100; @@ -1748,7 +1748,7 @@ void testOpenGLDeprocess(int N, int C, int H, int W, float error) { LOG(INFO) << "OpenGLDeprocess Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1757,7 +1757,7 @@ void testOpenGLDeprocess(int N, int C, int H, int W, float error) { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(3); CPUContext ctx; t->mutable_data()[0] = 30; @@ -1800,7 +1800,7 @@ void testOpenGLNormPlanarYUV(int N, int C, int H, int W, float error) { LOG(INFO) << "OpenGLNormPlanarYUV Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, 3, H, W); CPUContext ctx; for (auto i = 0; i < t->size(); ++i) { @@ -1809,7 +1809,7 @@ void testOpenGLNormPlanarYUV(int N, int C, int H, int W, float error) { } { - auto* t = ws.CreateBlob("mean")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("mean"), CPU); t->Resize(1, 3); CPUContext ctx; t->mutable_data()[0] = 30; @@ -1818,7 +1818,7 @@ void testOpenGLNormPlanarYUV(int N, int C, int H, int W, float error) { } { - auto* t = ws.CreateBlob("stdev")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("stdev"), CPU); t->Resize(1, 3); CPUContext ctx; t->mutable_data()[0] = 6; @@ -1879,7 +1879,7 @@ void OpenGL_copyops_speedtest(int N, LOG(INFO) << "OpenGL CopyOps Speed Test"; Workspace ws; { - auto* t = ws.CreateBlob("X_cpu")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU); t->Resize(N, C, H, W); CPUContext ctx; if (random_input) { @@ -1893,7 +1893,7 @@ void OpenGL_copyops_speedtest(int N, } { - auto* t = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); t->Resize(K, C, kernel_h, kernel_w); CPUContext ctx; if (random_input) { @@ -1907,7 +1907,7 @@ void OpenGL_copyops_speedtest(int N, } { - auto* t = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); t->Resize(K); CPUContext ctx; if (random_input) { @@ -1990,8 +1990,8 @@ void compareModelsForOpenGL(std::string name, Workspace cws; cws.RunNetOnce(initNet); - auto* t_cpu = cws.CreateBlob(truncatedPredictNet.external_input(0)) - ->GetMutableTensor(CPU); + auto* t_cpu = BlobGetMutableTensor( + cws.CreateBlob(truncatedPredictNet.external_input(0)), CPU); if (name == "styleTransfer") { CAFFE_ENFORCE_EQ(input_order, "NHWC"); CAFFE_ENFORCE_EQ(input_type, "uint8_t"); @@ -2032,8 +2032,8 @@ void compareModelsForOpenGL(std::string name, Workspace mws; mws.RunNetOnce(initNet); - auto* t_gl = mws.CreateBlob(truncatedOpenGLPredictNet.external_input(0)) - ->GetMutableTensor(CPU); + auto* t_gl = BlobGetMutableTensor( + mws.CreateBlob(truncatedOpenGLPredictNet.external_input(0)), CPU); if (name == "styleTransfer") { CAFFE_ENFORCE_EQ(input_order, "NHWC"); CAFFE_ENFORCE_EQ(input_type, "uint8_t"); @@ -2116,7 +2116,7 @@ void compareBatchedToTiledModels(std::string name, tws.RunNetOnce(initNet); auto* t_batch = - tws.CreateBlob(bachedNet.external_input(0))->GetMutableTensor(CPU); + BlobGetMutableTensor(tws.CreateBlob(bachedNet.external_input(0)), CPU); if (name == "styleTransfer") { CAFFE_ENFORCE_EQ(input_order, "NHWC"); CAFFE_ENFORCE_EQ(input_type, "uint8_t"); @@ -2143,7 +2143,7 @@ void compareBatchedToTiledModels(std::string name, bws.RunNetOnce(initNet); auto* t_tiling = - bws.CreateBlob(tiledNet.external_input(0))->GetMutableTensor(CPU); + BlobGetMutableTensor(bws.CreateBlob(tiledNet.external_input(0)), CPU); if (name == "styleTransfer") { CAFFE_ENFORCE_EQ(input_order, "NHWC"); CAFFE_ENFORCE_EQ(input_type, "uint8_t"); diff --git a/caffe2/mobile/contrib/snpe/snpe_op_benchmark.cc b/caffe2/mobile/contrib/snpe/snpe_op_benchmark.cc index deced719644..cfeed00e8b9 100644 --- a/caffe2/mobile/contrib/snpe/snpe_op_benchmark.cc +++ b/caffe2/mobile/contrib/snpe/snpe_op_benchmark.cc @@ -14,7 +14,7 @@ #define POPULATE_DATA(_n, _s, _l) \ do { \ Blob* _blob = ws.CreateBlob((_n)); \ - auto* _tensor = _blob->GetMutableTensor(CPU); \ + auto* _tensor = BlobGetMutableTensor(_blob, CPU); \ _tensor->Resize((_s)); \ memcpy(_tensor->mutable_data(), data_##_l, _tensor->nbytes()); \ } while (0) @@ -23,7 +23,7 @@ #define POPULATE_DATA(_n, _s, _l) \ do { \ Blob* _blob = ws.CreateBlob((_n)); \ - auto* _tensor = _blob->GetMutableTensor(CPU); \ + auto* _tensor = BlobGetMutableTensor(_blob, CPU); \ _tensor->Resize((_s)); \ memset(_tensor->mutable_data(), 1, _tensor->nbytes()); \ } while (0) @@ -43,7 +43,7 @@ void AddConstInput(const vector& shape, DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::Set(tensor->size(), value, tensor->mutable_data(), @@ -56,7 +56,7 @@ void AddNoiseInput(const vector& shape, DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::RandGaussian( diff --git a/caffe2/mobile/contrib/ulp2/ulp_test.cc b/caffe2/mobile/contrib/ulp2/ulp_test.cc index a1c1af0f6df..6316b05284f 100644 --- a/caffe2/mobile/contrib/ulp2/ulp_test.cc +++ b/caffe2/mobile/contrib/ulp2/ulp_test.cc @@ -289,13 +289,13 @@ void ConvTest2b1b(int IC, int KH, int KW, int H, int W, int OC, int N, ConvArgs def.add_arg()->CopyFrom(MakeArgument("pad_r", args.pad_r)); def.add_arg()->CopyFrom(MakeArgument("pad_t", args.pad_t)); def.add_arg()->CopyFrom(MakeArgument("pad_b", args.pad_b)); - auto* Xws = ws.CreateBlob("X")->GetMutableTensor(CPU); + auto* Xws = BlobGetMutableTensor(ws.CreateBlob("X"), CPU); Xws->ResizeLike(X); Xws->ShareExternalPointer(X.mutable_data(), X.size()); - auto* Wws = ws.CreateBlob("W")->GetMutableTensor(CPU); + auto* Wws = BlobGetMutableTensor(ws.CreateBlob("W"), CPU); Wws->ResizeLike(W_); Wws->ShareExternalPointer(W_.mutable_data(), W_.size()); - auto* bws = ws.CreateBlob("b")->GetMutableTensor(CPU); + auto* bws = BlobGetMutableTensor(ws.CreateBlob("b"), CPU); bws->ResizeLike(bias); bws->ShareExternalPointer(bias.mutable_data(), bias.size()); ws.RunOperatorOnce(def); diff --git a/caffe2/operators/batch_matmul_op_gpu_test.cc b/caffe2/operators/batch_matmul_op_gpu_test.cc index 804296307d6..31e179b3e41 100644 --- a/caffe2/operators/batch_matmul_op_gpu_test.cc +++ b/caffe2/operators/batch_matmul_op_gpu_test.cc @@ -30,7 +30,7 @@ class BatchMatMulOpGPUTest : public testing::Test { const float value, const string& name) { Blob* blob = ws_.CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CUDA); + auto* tensor = BlobGetMutableTensor(blob, CUDA); tensor->Resize(dims); math::Set( tensor->size(), diff --git a/caffe2/operators/batch_matmul_op_test.cc b/caffe2/operators/batch_matmul_op_test.cc index 45db7dd5b84..c74829b4f8f 100644 --- a/caffe2/operators/batch_matmul_op_test.cc +++ b/caffe2/operators/batch_matmul_op_test.cc @@ -24,7 +24,7 @@ class BatchMatMulOpTest : public testing::Test { const float value, const string& name) { Blob* blob = ws_.CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(dims); math::Set( tensor->size(), diff --git a/caffe2/operators/boolean_unmask_ops_test.cc b/caffe2/operators/boolean_unmask_ops_test.cc index 8814be17153..b0c5f7dcdff 100644 --- a/caffe2/operators/boolean_unmask_ops_test.cc +++ b/caffe2/operators/boolean_unmask_ops_test.cc @@ -16,7 +16,7 @@ static void AddScalarInput( Workspace* ws, bool isEmpty = false) { Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); if (!isEmpty) { tensor->Resize(vector{1}); *(tensor->template mutable_data()) = value; diff --git a/caffe2/operators/conv_op_shared.cc b/caffe2/operators/conv_op_shared.cc index b9f54b6d55b..155b6f0cd24 100644 --- a/caffe2/operators/conv_op_shared.cc +++ b/caffe2/operators/conv_op_shared.cc @@ -27,8 +27,8 @@ void runWithSharedBuffer( auto* mutexPtr = mutexBlob->GetMutable>(); std::lock_guard g(**mutexPtr); - auto* buffer = - ws->GetBlob("__CAFFE2_SHARED_CONV_BUFFER_CPU__")->GetMutableTensor(CPU); + auto* buffer = BlobGetMutableTensor( + ws->GetBlob("__CAFFE2_SHARED_CONV_BUFFER_CPU__"), CPU); f(buffer); } } diff --git a/caffe2/operators/conv_op_shared_gpu.cc b/caffe2/operators/conv_op_shared_gpu.cc index f80d15a5d90..c1f37c7f136 100644 --- a/caffe2/operators/conv_op_shared_gpu.cc +++ b/caffe2/operators/conv_op_shared_gpu.cc @@ -20,8 +20,8 @@ void runWithSharedBuffer( auto* mutexPtr = mutexBlob->GetMutable>(); std::lock_guard g(**mutexPtr); - auto* buffer = - ws->GetBlob("__CAFFE2_SHARED_CONV_BUFFER_CUDA__")->GetMutableTensor(CUDA); + auto* buffer = BlobGetMutableTensor( + ws->GetBlob("__CAFFE2_SHARED_CONV_BUFFER_CUDA__"), CUDA); f(buffer); } } diff --git a/caffe2/operators/conv_transpose_op_mobile_test.cc b/caffe2/operators/conv_transpose_op_mobile_test.cc index 6eb45eb5f8d..3bc29516643 100644 --- a/caffe2/operators/conv_transpose_op_mobile_test.cc +++ b/caffe2/operators/conv_transpose_op_mobile_test.cc @@ -17,7 +17,7 @@ void AddConstInput(const vector& shape, DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::Set( tensor->size(), value, tensor->template mutable_data(), &context); @@ -29,7 +29,7 @@ void AddNoiseInput(const vector& shape, DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::RandGaussian( diff --git a/caffe2/operators/dataset_ops.cc b/caffe2/operators/dataset_ops.cc index 83294224280..e3c0abe83d8 100644 --- a/caffe2/operators/dataset_ops.cc +++ b/caffe2/operators/dataset_ops.cc @@ -1428,7 +1428,7 @@ class TreeCursorSerializer : public BlobSerializerBase { // serialize offsets as a tensor if (cursor->offsets.size() > 0) { Blob offsets_blob; - auto* offsets = offsets_blob.GetMutableTensor(CPU); + auto* offsets = BlobGetMutableTensor(&offsets_blob, CPU); offsets->Resize(cursor->offsets.size()); std::copy( cursor->offsets.begin(), diff --git a/caffe2/operators/dropout_op_cudnn.cc b/caffe2/operators/dropout_op_cudnn.cc index a68a1263f6f..8a40c731143 100644 --- a/caffe2/operators/dropout_op_cudnn.cc +++ b/caffe2/operators/dropout_op_cudnn.cc @@ -150,7 +150,7 @@ bool CuDNNDropoutOp::DoRunWithType() { // Reshape tensor descriptors if necessary if (X.dims() != cudnn_input_dims_ && !is_test_) { CAFFE_ENFORCE(scratch_blob_); - Tensor* states = scratch_blob_->GetMutableTensor(CUDA); + Tensor* states = BlobGetMutableTensor(scratch_blob_, CUDA); cudnn_input_dims_ = X.dims(); CUDNN_ENFORCE(cudnnSetTensor4dDescriptor( data_desc_, diff --git a/caffe2/operators/elementwise_op_test.h b/caffe2/operators/elementwise_op_test.h index bcd547e28f0..b785d040c8f 100644 --- a/caffe2/operators/elementwise_op_test.h +++ b/caffe2/operators/elementwise_op_test.h @@ -19,7 +19,7 @@ void FillTensor( const std::vector& shape, const std::vector& values) { auto* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(Context::GetDeviceType()); + auto* tensor = BlobGetMutableTensor(blob, Context::GetDeviceType()); tensor->Resize(shape); auto* mutable_data = tensor->template mutable_data(); const O_Type* data = reinterpret_cast(values.data()); diff --git a/caffe2/operators/generate_proposals_op_test.cc b/caffe2/operators/generate_proposals_op_test.cc index 2b3a033a665..da7fdc65087 100644 --- a/caffe2/operators/generate_proposals_op_test.cc +++ b/caffe2/operators/generate_proposals_op_test.cc @@ -18,7 +18,7 @@ static void AddConstInput( DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::Set( tensor->size(), value, tensor->template mutable_data(), &context); @@ -34,7 +34,7 @@ static void AddLinSpacedInput( DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); EigenVectorMap tensor_vec( tensor->template mutable_data(), tensor->size()); @@ -51,7 +51,7 @@ static void AddInput( DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); EigenVectorMap tensor_vec( tensor->template mutable_data(), tensor->size()); diff --git a/caffe2/operators/index_ops.cc b/caffe2/operators/index_ops.cc index 241b0ff97c6..2fb8f3b338d 100644 --- a/caffe2/operators/index_ops.cc +++ b/caffe2/operators/index_ops.cc @@ -353,7 +353,7 @@ class IndexSerializer : public BlobSerializerBase { SerializationAcceptor acceptor) override { auto& base = blob.template Get>(); Blob tensor_blob; - auto* tensor_out = tensor_blob.GetMutableTensor(CPU); + auto* tensor_out = BlobGetMutableTensor(&tensor_blob, CPU); if (base->Type().Match()) { doStore(base, tensor_out); diff --git a/caffe2/operators/onnx_while_op.h b/caffe2/operators/onnx_while_op.h index dbd51039524..7a3c34cfbf7 100644 --- a/caffe2/operators/onnx_while_op.h +++ b/caffe2/operators/onnx_while_op.h @@ -213,23 +213,23 @@ class ONNXWhileOp final : public Operator { lcd_tensors_.clear(); for (int i = 2; i < body_net_def.external_input_size(); ++i) { Blob* b = loop_ws_->CreateBlob(body_net_def.external_input(i)); - Tensor* t = b->GetMutableTensor(Context::GetDeviceType()); + Tensor* t = BlobGetMutableTensor(b, Context::GetDeviceType()); lcd_tensors_.push_back(t); } // First output is the iteration variable auto* iteration_var_blob = loop_ws_->CreateBlob( body_net_def.external_input(0)); iteration_var_ = - iteration_var_blob->GetMutableTensor(Context::GetDeviceType()); + BlobGetMutableTensor(iteration_var_blob, Context::GetDeviceType()); - input_condition_var_ = - loop_ws_->CreateBlob(body_net_def.external_input(1)) - ->GetMutableTensor(Context::GetDeviceType()); + input_condition_var_ = BlobGetMutableTensor( + loop_ws_->CreateBlob(body_net_def.external_input(1)), + Context::GetDeviceType()); auto* condition_var_blob = loop_ws_->CreateBlob(body_net_def.external_output(0)); condition_var_ = - condition_var_blob->GetMutableTensor(Context::GetDeviceType()); + BlobGetMutableTensor(condition_var_blob, Context::GetDeviceType()); condition_var_->Resize(1); condition_var_->template mutable_data(); diff --git a/caffe2/operators/onnxifi_op.cc b/caffe2/operators/onnxifi_op.cc index d1b0824f1b3..767a37d5fc7 100644 --- a/caffe2/operators/onnxifi_op.cc +++ b/caffe2/operators/onnxifi_op.cc @@ -15,7 +15,7 @@ void BlobToTensorDescriptor( // Memory type // We only allow weights to be CPU tensor for now CAFFE_ENFORCE( - blob->IsTensorType(CPU), + BlobIsTensorType(*blob, CPU), "Initialization blob ", name, " needs to be TensorCPU"); diff --git a/caffe2/operators/operator_fallback_gpu.h b/caffe2/operators/operator_fallback_gpu.h index 8ef39e7c0e7..5b3a38dbfbd 100644 --- a/caffe2/operators/operator_fallback_gpu.h +++ b/caffe2/operators/operator_fallback_gpu.h @@ -65,8 +65,8 @@ class GPUFallbackOpEx final : public Operator { bool need_sync = false; for (int i = 0; i < InputSize(); ++i) { if (this->InputIsTensorType(i, CUDA)) { - local_input_blobs_[i]->GetMutableTensor(CPU)->CopyFrom( - Input(i), &context_); + BlobGetMutableTensor(local_input_blobs_[i], CPU) + ->CopyFrom(Input(i), &context_); need_sync = true; } else { VLOG(1) << "Input " << i << " is not TensorCUDA. Skipping copy."; @@ -95,7 +95,7 @@ class GPUFallbackOpEx final : public Operator { continue; } CAFFE_ENFORCE( - local_output_blobs_[i]->IsTensorType(CPU), + BlobIsTensorType(*local_output_blobs_[i], CPU), "GPU fallback op currently does not support non-TensorCPU " "output type who needs copying."); Output(i)->CopyFrom(local_output_blobs_[i]->template Get()); diff --git a/caffe2/operators/operator_fallback_gpu_test.cc b/caffe2/operators/operator_fallback_gpu_test.cc index 964708bc109..0870a4be2dd 100644 --- a/caffe2/operators/operator_fallback_gpu_test.cc +++ b/caffe2/operators/operator_fallback_gpu_test.cc @@ -40,7 +40,7 @@ TEST(OperatorFallbackTest, IncrementByOneOp) { for (int i = 0; i < 6; ++i) { source_tensor.mutable_data()[i] = i; } - ws.CreateBlob("X")->GetMutableTensor(CPU)->CopyFrom(source_tensor); + BlobGetMutableTensor(ws.CreateBlob("X"), CPU)->CopyFrom(source_tensor); unique_ptr op(CreateOperator(op_def, &ws)); EXPECT_TRUE(op.get() != nullptr); EXPECT_TRUE(op->Run()); @@ -64,7 +64,7 @@ TEST(OperatorFallbackTest, GPUIncrementByOneOp) { for (int i = 0; i < 6; ++i) { source_tensor.mutable_data()[i] = i; } - ws.CreateBlob("X")->GetMutableTensor(CUDA)->CopyFrom(source_tensor); + BlobGetMutableTensor(ws.CreateBlob("X"), CUDA)->CopyFrom(source_tensor); unique_ptr op(CreateOperator(op_def, &ws)); EXPECT_TRUE(op.get() != nullptr); EXPECT_TRUE(op->Run()); diff --git a/caffe2/operators/reshape_op_gpu_test.cc b/caffe2/operators/reshape_op_gpu_test.cc index 3537ab69d05..d4ac325a78b 100644 --- a/caffe2/operators/reshape_op_gpu_test.cc +++ b/caffe2/operators/reshape_op_gpu_test.cc @@ -20,7 +20,7 @@ static void AddConstInput( option.set_device_type(PROTO_CUDA); CUDAContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CUDA); + auto* tensor = BlobGetMutableTensor(blob, CUDA); tensor->Resize(shape); math::Set( tensor->size(), value, tensor->template mutable_data(), &context); diff --git a/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h b/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h index 98675cea858..63d58f3ccd8 100644 --- a/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h +++ b/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h @@ -43,11 +43,10 @@ class RecurrentNetworkBlobFetcherOp final : public Operator { prefix_ + std::string("_") + blob_name + caffe2::to_string(i); blob_names_vector.push_back(newBlobName); - ws_->CreateBlob(newBlobName) - ->GetMutableTensor(CPU) + BlobGetMutableTensor(ws_->CreateBlob(newBlobName), CPU) ->ResizeLike(currentTensor); auto type = Context::GetDeviceType(); - auto* newTensor = ws_->GetBlob(newBlobName)->GetMutableTensor(type); + auto* newTensor = BlobGetMutableTensor(ws_->GetBlob(newBlobName), type); newTensor->CopyFrom(currentTensor); } } diff --git a/caffe2/operators/rnn/recurrent_network_executor.h b/caffe2/operators/rnn/recurrent_network_executor.h index 7e37e562e77..4cb53a6d7d3 100644 --- a/caffe2/operators/rnn/recurrent_network_executor.h +++ b/caffe2/operators/rnn/recurrent_network_executor.h @@ -111,10 +111,10 @@ class RecurrentNetworkExecutorBase { // the forward-only mode. std::string this_timestep_blob = timestep_blob_ + "_rnnexec_t" + caffe2::to_string(t); - ws->CreateBlob(this_timestep_blob)->GetMutableTensor(CPU)->Resize(1); + BlobGetMutableTensor(ws->CreateBlob(this_timestep_blob), CPU)->Resize(1); auto b = ws->GetBlob(this_timestep_blob); CAFFE_ENFORCE(b); - b->GetMutableTensor(CPU)->template mutable_data()[0] = t; + BlobGetMutableTensor(b, CPU)->template mutable_data()[0] = t; // Copy the operators from template for (auto& template_rnn_op : timestep_ops_template_) { diff --git a/caffe2/operators/rnn/recurrent_network_op.h b/caffe2/operators/rnn/recurrent_network_op.h index 2421bc44263..21b3064a6fa 100644 --- a/caffe2/operators/rnn/recurrent_network_op.h +++ b/caffe2/operators/rnn/recurrent_network_op.h @@ -52,10 +52,11 @@ struct CAFFE2_API ScratchWorkspaces { }; inline void UpdateTimestepBlob(Workspace* ws, std::string blob_name, int t) { - ws->CreateBlob(blob_name)->GetMutableTensor(CPU)->Resize(1); + BlobGetMutableTensor(ws->CreateBlob(blob_name), CPU)->Resize(1); auto timestepBlob = ws->GetBlob(blob_name); CAFFE_ENFORCE(timestepBlob); - timestepBlob->GetMutableTensor(CPU)->template mutable_data()[0] = t; + BlobGetMutableTensor(timestepBlob, CPU)->template mutable_data()[0] = + t; } CAFFE2_API std::map GetRecurrentMapping( @@ -71,8 +72,9 @@ void applyOffsetAlias( << " at offset: " << oc.offset; auto srcBlob = ws->GetBlob(oc.src); CAFFE_ENFORCE(srcBlob); - auto* src = srcBlob->GetMutableTensor(Context::GetDeviceType()); - auto* dst = ws->GetBlob(oc.dst)->GetMutableTensor(Context::GetDeviceType()); + auto* src = BlobGetMutableTensor(srcBlob, Context::GetDeviceType()); + auto* dst = + BlobGetMutableTensor(ws->GetBlob(oc.dst), Context::GetDeviceType()); auto timestep = src->size() / src->dim(0); auto dims = src->dims(); const int32_t startDstTimestep = @@ -113,7 +115,7 @@ void initializeRecurrentInput( Context* context) { auto stateBlob = ws->GetBlob(rc.state); CAFFE_ENFORCE(stateBlob); - auto* state = stateBlob->GetMutableTensor(Context::GetDeviceType()); + auto* state = BlobGetMutableTensor(stateBlob, Context::GetDeviceType()); auto inputBlob = ws->GetBlob(rc.input); CAFFE_ENFORCE(inputBlob); @@ -660,7 +662,7 @@ class RecurrentNetworkGradientOp final : public Operator { auto gBlob = sharedWs_->GetBlob(param.grad); CAFFE_ENFORCE(gBlob); - auto* g = gBlob->GetMutableTensor(Context::GetDeviceType()); + auto* g = BlobGetMutableTensor(gBlob, Context::GetDeviceType()); g->ResizeLike(p); math::Set( g->size(), @@ -676,7 +678,7 @@ class RecurrentNetworkGradientOp final : public Operator { auto gBlob = sharedWs_->CreateBlob(rg.grad); CAFFE_ENFORCE(gBlob); - auto* g = gBlob->GetMutableTensor(Context::GetDeviceType()); + auto* g = BlobGetMutableTensor(gBlob, Context::GetDeviceType()); g->ResizeLike(p); CAFFE_ENFORCE_EQ(g->ndim(), 3); const auto timestep = g->size() / g->dim(0); @@ -703,7 +705,7 @@ class RecurrentNetworkGradientOp final : public Operator { << ". Size: " << Input(gradientInputIndex).size(); auto pGradientBlob = sharedWs_->GetBlob(gradientName); CAFFE_ENFORCE(pGradientBlob); - auto* g = pGradientBlob->GetMutableTensor(Context::GetDeviceType()); + auto* g = BlobGetMutableTensor(pGradientBlob, Context::GetDeviceType()); g->ResizeLike(Input(gradientInputIndex)); g->template mutable_data(); } @@ -717,7 +719,7 @@ class RecurrentNetworkGradientOp final : public Operator { << rg.lastExternalGrad << " for final time step (sep. blob)"; auto gBlob = sharedWs_->GetBlob(rg.grad); CAFFE_ENFORCE(gBlob); - auto* g = gBlob->GetMutableTensor(Context::GetDeviceType()); + auto* g = BlobGetMutableTensor(gBlob, Context::GetDeviceType()); auto oglastBlob = sharedWs_->GetBlob(rg.lastExternalGrad); CAFFE_ENFORCE(oglastBlob); @@ -779,7 +781,7 @@ class RecurrentNetworkGradientOp final : public Operator { T* output_data = Output(outputIdx)->template mutable_data(); auto pBlob = sharedWs_->GetBlob(recurrentGradients_[i].grad); CAFFE_ENFORCE(pBlob); - auto* p = pBlob->GetMutableTensor(Context::GetDeviceType()); + auto* p = BlobGetMutableTensor(pBlob, Context::GetDeviceType()); if (Input(inputId).ndim() >= 2) { // Gradient states blob should live. And if it gets changed by the diff --git a/caffe2/operators/roi_align_op_gpu_test.cc b/caffe2/operators/roi_align_op_gpu_test.cc index 2647a97d6f0..7257ec44c25 100644 --- a/caffe2/operators/roi_align_op_gpu_test.cc +++ b/caffe2/operators/roi_align_op_gpu_test.cc @@ -18,7 +18,7 @@ void AddConstInput( Context* context, Workspace* ws) { Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(Context::GetDeviceType()); + auto* tensor = BlobGetMutableTensor(blob, Context::GetDeviceType()); tensor->Resize(shape); math::Set( tensor->size(), value, tensor->template mutable_data(), context); @@ -39,7 +39,7 @@ void AddInput( const string& name, Workspace* ws) { Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); EigenVectorMap tensor_vec( tensor->template mutable_data(), tensor->size()); @@ -57,7 +57,7 @@ void AddInput( tmp_vec.array() = utils::AsEArrXt(values); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CUDA); + auto* tensor = BlobGetMutableTensor(blob, CUDA); tensor->CopyFrom(tmp); } diff --git a/caffe2/operators/string_ops_test.cc b/caffe2/operators/string_ops_test.cc index c9ba13efb50..2092ae804f2 100644 --- a/caffe2/operators/string_ops_test.cc +++ b/caffe2/operators/string_ops_test.cc @@ -9,7 +9,7 @@ class StringJoinOpTest : public testing::Test { public: bool runOp(const TensorCPU& input) { auto* blob = ws_.CreateBlob("X"); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->ResizeLike(input); tensor->ShareData(input); @@ -26,7 +26,7 @@ class StringJoinOpTest : public testing::Test { const std::string* checkAndGetOutput(int outputSize) { const auto* output = ws_.GetBlob("Y"); EXPECT_NE(output, nullptr); - EXPECT_TRUE(output->IsTensorType(CPU)); + EXPECT_TRUE(BlobIsTensorType(*output, CPU)); const auto& outputTensor = output->Get(); EXPECT_EQ(outputTensor.ndim(), 1); EXPECT_EQ(outputTensor.dim(0), outputSize); @@ -42,7 +42,7 @@ TEST_F(StringJoinOpTest, testString1DJoin) { std::vector input = {"a", "xx", "c"}; auto blob = caffe2::make_unique(); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob.get(), CPU); tensor->Resize(input.size()); auto* data = tensor->template mutable_data(); for (int i = 0; i < input.size(); ++i) { @@ -62,7 +62,7 @@ TEST_F(StringJoinOpTest, testString2DJoin) { {"dd", "ee", "ff"}}; auto blob = caffe2::make_unique(); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob.get(), CPU); tensor->Resize(input.size(), input[0].size()); auto* data = tensor->template mutable_data(); for (int i = 0; i < input.size(); ++i) { @@ -82,7 +82,7 @@ TEST_F(StringJoinOpTest, testFloat1DJoin) { std::vector input = {3.90f, 5.234f, 8.12f}; auto blob = caffe2::make_unique(); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob.get(), CPU); tensor->Resize(input.size()); auto* data = tensor->template mutable_data(); for (int i = 0; i < input.size(); ++i) { @@ -102,7 +102,7 @@ TEST_F(StringJoinOpTest, testFloat2DJoin) { {4.67f, 5.90f, 6.32f}}; auto blob = caffe2::make_unique(); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob.get(), CPU); tensor->Resize(input.size(), input[0].size()); auto* data = tensor->template mutable_data(); for (int i = 0; i < input.size(); ++i) { @@ -122,7 +122,7 @@ TEST_F(StringJoinOpTest, testLong2DJoin) { std::vector> input = {{100, 200}, {1000, 2000}}; auto blob = caffe2::make_unique(); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob.get(), CPU); tensor->Resize(input.size(), input[0].size()); auto* data = tensor->template mutable_data(); for (int i = 0; i < input.size(); ++i) { diff --git a/caffe2/operators/stylizer_ops.cc b/caffe2/operators/stylizer_ops.cc index a6d395fe9ba..bfc41a46299 100644 --- a/caffe2/operators/stylizer_ops.cc +++ b/caffe2/operators/stylizer_ops.cc @@ -82,10 +82,10 @@ class PackedInt8BGRANHWCToNCHWCStylizerPreprocessOp auto defaultNoiseSize = OperatorBase::GetSingleArgument( "noise_size", 491 /* prime to avoid artifacts */); - if (!noiseBlob->IsTensorType(CPU)) { + if (!BlobIsTensorType(*noiseBlob, CPU)) { // Initialize random noise on first use. // Cache it to maintain temporal consistency. - auto* t = noiseBlob->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(noiseBlob, CPU); #if defined(__ARM_NEON__) || defined(__ARM_NEON) // Noise space is larger for vectorized code due to the diff --git a/caffe2/operators/tensor_protos_db_input.h b/caffe2/operators/tensor_protos_db_input.h index cd081bf959e..e9f5b1a8f84 100644 --- a/caffe2/operators/tensor_protos_db_input.h +++ b/caffe2/operators/tensor_protos_db_input.h @@ -56,7 +56,7 @@ bool TensorProtosDBInput::Prefetch() { protos.mutable_protos(i)->clear_device_detail(); } deserializer.Deserialize( - protos.protos(i), prefetched_blobs_[i].GetMutableTensor(CPU)); + protos.protos(i), BlobGetMutableTensor(&prefetched_blobs_[i], CPU)); } } else { vector temp_tensors; @@ -74,11 +74,11 @@ bool TensorProtosDBInput::Prefetch() { vector dims( protos.protos(i).dims().begin(), protos.protos(i).dims().end()); dims.insert(dims.begin(), batch_size_); - prefetched_blobs_[i].GetMutableTensor(CPU)->Resize(dims); + BlobGetMutableTensor(&prefetched_blobs_[i], CPU)->Resize(dims); } } for (int i = 0; i < protos.protos_size(); ++i) { - TensorCPU* dst = prefetched_blobs_[i].GetMutableTensor(CPU); + TensorCPU* dst = BlobGetMutableTensor(&prefetched_blobs_[i], CPU); TensorCPU& src = temp_tensors[i]; if (protos.protos(i).has_device_detail()) { protos.mutable_protos(i)->clear_device_detail(); diff --git a/caffe2/operators/tt_linear_op.h b/caffe2/operators/tt_linear_op.h index 421c26e318b..1a5cdc344ce 100644 --- a/caffe2/operators/tt_linear_op.h +++ b/caffe2/operators/tt_linear_op.h @@ -52,7 +52,7 @@ class TTLinearOp final : public Operator { int cores_idx = 0; // Temporary buffer to facilitate multiplication of TT-cores with input - auto Y_buf = Y_temp_->GetMutableTensor(Context::GetDeviceType()); + auto Y_buf = BlobGetMutableTensor(Y_temp_.get(), Context::GetDeviceType()); Y_buf->ResizeLike(X); Y_buf->CopyFrom(X); diff --git a/caffe2/operators/utility_ops_gpu_test.cc b/caffe2/operators/utility_ops_gpu_test.cc index f500afaf9ed..1099d900cbe 100644 --- a/caffe2/operators/utility_ops_gpu_test.cc +++ b/caffe2/operators/utility_ops_gpu_test.cc @@ -19,7 +19,7 @@ static void AddConstInput( option.set_device_type(PROTO_CUDA); CUDAContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CUDA); + auto* tensor = BlobGetMutableTensor(blob, CUDA); tensor->Resize(shape); math::Set( tensor->size(), value, tensor->template mutable_data(), &context); diff --git a/caffe2/operators/utility_ops_test.cc b/caffe2/operators/utility_ops_test.cc index 379dd52655c..a3a2a409674 100644 --- a/caffe2/operators/utility_ops_test.cc +++ b/caffe2/operators/utility_ops_test.cc @@ -16,7 +16,7 @@ static void AddConstInput( DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::Set( tensor->size(), value, tensor->template mutable_data(), &context); diff --git a/caffe2/opt/fusion.cc b/caffe2/opt/fusion.cc index fdf5fdc31e1..8c324a97c50 100644 --- a/caffe2/opt/fusion.cc +++ b/caffe2/opt/fusion.cc @@ -44,10 +44,10 @@ bool fuseConvBNHelper(repr::NNModule* nn, caffe2::Workspace* ws) { CAFFE_ENFORCE( bnInputs.size() >= 5, "Invalid batch normalization input size"); -#define EXPOSE_TENSOR_DATA(name, index, inputs) \ - auto name = repr::nn::get(inputs[index]); \ - assert(ws->HasBlob(name->getName()) && "Blob not in workspace"); \ - auto name##Tensor = ws->GetBlob(name->getName())->GetMutableTensor(CPU); \ +#define EXPOSE_TENSOR_DATA(name, index, inputs) \ + auto name = repr::nn::get(inputs[index]); \ + assert(ws->HasBlob(name->getName()) && "Blob not in workspace"); \ + auto name##Tensor = BlobGetMutableTensor(ws->GetBlob(name->getName()), CPU); \ auto name##Data = name##Tensor->mutable_data(); EXPOSE_TENSOR_DATA(filter, 1, convInputs); @@ -76,7 +76,7 @@ bool fuseConvBNHelper(repr::NNModule* nn, caffe2::Workspace* ws) { nn->dataFlow.createEdge(convBiasNode, convNode); auto* blob = ws->CreateBlob(convBiasName); - caffe2::TensorCPU* tensor = blob->GetMutableTensor(caffe2::CPU); + caffe2::TensorCPU* tensor = BlobGetMutableTensor(blob, caffe2::CPU); CHECK_NOTNULL(tensor); // Get output channel size_t c = filterTensor->dim32(0); diff --git a/caffe2/opt/onnxifi_transformer.cc b/caffe2/opt/onnxifi_transformer.cc index ce79df56ecb..a048503fea9 100644 --- a/caffe2/opt/onnxifi_transformer.cc +++ b/caffe2/opt/onnxifi_transformer.cc @@ -173,7 +173,7 @@ NetDef OnnxifiTransformer::SubnetToOnnxifiOp( // Feed into workspace as CPU Tensors auto* blob = ws->CreateBlob(t.name()); - auto* cpu_tensor = blob->GetMutableTensor(CPU); + auto* cpu_tensor = BlobGetMutableTensor(blob, CPU); std::vector dims; for(const auto& d : t.dims()) { dims.push_back(d); diff --git a/caffe2/predictor/predictor.cc b/caffe2/predictor/predictor.cc index 84dac93753d..7775e697764 100644 --- a/caffe2/predictor/predictor.cc +++ b/caffe2/predictor/predictor.cc @@ -10,14 +10,14 @@ void enforceIsTensor(Workspace* ws, const std::string& name) { auto blob = ws->GetBlob(name); CAFFE_ENFORCE(blob, "Blob does not exist: ", name); CAFFE_ENFORCE( - blob->IsTensorType(CPU), "Blob is not a CPU Tensor: ", name); + BlobIsTensorType(*blob, CPU), "Blob is not a CPU Tensor: ", name); } TensorCPU* getTensor(Workspace* ws, const std::string& name) { enforceIsTensor(ws, name); auto* blob = ws->GetBlob(name); CAFFE_ENFORCE(blob, "Blob: ", name, " does not exist"); - return blob->GetMutableTensor(CPU); + return BlobGetMutableTensor(blob, CPU); } void shareInputTensor( @@ -60,7 +60,7 @@ Predictor::Predictor(PredictorConfig config) : config_(std::move(config)) { for (const auto& name : config_.predict_net->external_input()) { if (!initialized.count(name)) { auto* blob = config_.ws->CreateBlob(name); - blob->GetMutableTensor(CPU); + BlobGetMutableTensor(blob, CPU); } } CAFFE_ENFORCE(config_.ws->CreateNet(config_.predict_net)); diff --git a/caffe2/predictor/predictor_test.cc b/caffe2/predictor/predictor_test.cc index ae4f73e9da0..a0245cd7a86 100644 --- a/caffe2/predictor/predictor_test.cc +++ b/caffe2/predictor/predictor_test.cc @@ -135,7 +135,7 @@ std::unique_ptr randomTensor( const std::vector& dims, CPUContext* ctx) { auto blob = make_unique(); - auto* t = blob->GetMutableTensor(CPU); + auto* t = BlobGetMutableTensor(blob.get(), CPU); t->Resize(dims); math::RandUniform( t->size(), -1.0, 1.0, t->template mutable_data(), ctx); @@ -180,7 +180,7 @@ TEST_F(PredictorTest, SimpleBatchSized) { auto inputData = randomTensor({1, 4}, ctx_.get()); Predictor::TensorList input; input.emplace_back(CPU); - auto tensor = inputData->GetMutableTensor(CPU); + auto tensor = BlobGetMutableTensor(inputData.get(), CPU); input.back().ResizeLike(*tensor); input.back().ShareData(*tensor); Predictor::TensorList output; @@ -196,7 +196,7 @@ TEST_F(PredictorTest, SimpleBatchSizedMapInput) { auto inputData = randomTensor({1, 4}, ctx_.get()); Predictor::TensorMap input; auto iter = input.emplace("data", Tensor(CPU)); - auto tensor = inputData->GetMutableTensor(CPU); + auto tensor = BlobGetMutableTensor(inputData.get(), CPU); iter.first->second.ResizeLike(*tensor); iter.first->second.ShareData(*tensor); diff --git a/caffe2/python/pybind_state.cc b/caffe2/python/pybind_state.cc index 81197047102..9a1d715bfdf 100644 --- a/caffe2/python/pybind_state.cc +++ b/caffe2/python/pybind_state.cc @@ -328,7 +328,7 @@ void addObjectMethods(py::module& m) { }) .def( "tensor", - [](Blob* blob) { return py::cast(blob->GetMutableTensor(CPU)); }, + [](Blob* blob) { return py::cast(BlobGetMutableTensor(blob, CPU)); }, py::return_value_policy::reference_internal) .def( "_feed", diff --git a/caffe2/python/pybind_state.h b/caffe2/python/pybind_state.h index 59f39dd3130..dd5d3b9bc18 100644 --- a/caffe2/python/pybind_state.h +++ b/caffe2/python/pybind_state.h @@ -234,7 +234,7 @@ class TensorFeeder : public BlobFeederBase { FeedTensor( option, original_array, - blob->GetMutableTensor(Context::GetDeviceType())); + BlobGetMutableTensor(blob, Context::GetDeviceType())); } }; @@ -366,31 +366,32 @@ class PythonOpBase : public Operator { // make sure output blob is initialized before creating the binding if (forced_cpu_outputs_.count(i)) { - blob->GetMutableTensor(Context::GetDeviceType()); + BlobGetMutableTensor(blob, Context::GetDeviceType()); } else { - blob->GetMutableTensor(Context::GetDeviceType()); + BlobGetMutableTensor(blob, Context::GetDeviceType()); } py::object py_obj; if (blob->template IsType()) { if (use_dlpack) { DLPackWrapper wrapper( - blob->GetMutableTensor(Context::GetDeviceType()), cpu_option); + BlobGetMutableTensor(blob, Context::GetDeviceType()), + cpu_option); py_obj = py::cast(wrapper, py::return_value_policy::copy); } else { py_obj = py::cast( - blob->GetMutableTensor(Context::GetDeviceType()), + BlobGetMutableTensor(blob, Context::GetDeviceType()), py::return_value_policy::reference); } } else { if (use_dlpack) { DLPackWrapper wrapper( - blob->GetMutableTensor(Context::GetDeviceType()), + BlobGetMutableTensor(blob, Context::GetDeviceType()), this->device_option()); py_obj = py::cast(wrapper, py::return_value_policy::copy); } else { py_obj = py::cast( - blob->GetMutableTensor(Context::GetDeviceType()), + BlobGetMutableTensor(blob, Context::GetDeviceType()), py::return_value_policy::reference); } } diff --git a/caffe2/python/pybind_state_ideep.cc b/caffe2/python/pybind_state_ideep.cc index ebad6cf8d96..f0307f7b648 100644 --- a/caffe2/python/pybind_state_ideep.cc +++ b/caffe2/python/pybind_state_ideep.cc @@ -163,8 +163,8 @@ public: DeviceOption cpu_option(option); cpu_option.set_device_type(DeviceTypeProto::PROTO_CPU); TensorFeeder cpu_tensor_feeder; - cpu_tensor_feeder.FeedTensor(cpu_option, original_array, - blob->GetMutableTensor(CPU)); + cpu_tensor_feeder.FeedTensor( + cpu_option, original_array, BlobGetMutableTensor(blob, CPU)); } } catch (ideep::error &e) { LOG(ERROR) << "IDEEP error: " << e.message; diff --git a/caffe2/share/contrib/depthwise/depthwise3x3_conv_op_test.cc b/caffe2/share/contrib/depthwise/depthwise3x3_conv_op_test.cc index 4ac3524d49d..d102985e2fd 100644 --- a/caffe2/share/contrib/depthwise/depthwise3x3_conv_op_test.cc +++ b/caffe2/share/contrib/depthwise/depthwise3x3_conv_op_test.cc @@ -19,7 +19,7 @@ void AddNoiseInput( DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::RandGaussian( diff --git a/caffe2/share/contrib/nnpack/conv_op.cc b/caffe2/share/contrib/nnpack/conv_op.cc index 05c945106c5..f11e05b6739 100644 --- a/caffe2/share/contrib/nnpack/conv_op.cc +++ b/caffe2/share/contrib/nnpack/conv_op.cc @@ -231,11 +231,12 @@ bool NNPACKConvOp::RunOnDeviceWithOrderNCHW() { (transformedFilterSize + sizeof(float) - 1) / sizeof(float); for (auto g = 0; g < group_; g++) { - transformedFilters_[g] = ws_->CreateBlob( - "__transformed_kernel_" + - to_string(__sync_fetch_and_add( - &precomputed_transform_id, 1))) - ->GetMutableTensor(CPU); + transformedFilters_[g] = BlobGetMutableTensor( + ws_->CreateBlob( + "__transformed_kernel_" + + to_string( + __sync_fetch_and_add(&precomputed_transform_id, 1))), + CPU); transformedFilters_[g]->Resize(transformedFilterElements); status = nnp_convolution_inference( diff --git a/caffe2/share/contrib/nnpack/nnpack_test.cc b/caffe2/share/contrib/nnpack/nnpack_test.cc index 2f892118982..10eb6348bec 100644 --- a/caffe2/share/contrib/nnpack/nnpack_test.cc +++ b/caffe2/share/contrib/nnpack/nnpack_test.cc @@ -19,7 +19,7 @@ void AddNoiseInput( DeviceOption option; CPUContext context(option); Blob* blob = ws->CreateBlob(name); - auto* tensor = blob->GetMutableTensor(CPU); + auto* tensor = BlobGetMutableTensor(blob, CPU); tensor->Resize(shape); math::RandGaussian( diff --git a/caffe2/utils/hip/math_blas_hip_test.cc b/caffe2/utils/hip/math_blas_hip_test.cc index 911c2b09868..a5df5900ee2 100644 --- a/caffe2/utils/hip/math_blas_hip_test.cc +++ b/caffe2/utils/hip/math_blas_hip_test.cc @@ -26,13 +26,13 @@ TEST(MathROCBLASTest, GemmNoTransNoTrans) { vector shapeX{5, 10}; vector shapeW{10, 6}; vector shapeY{5, 6}; - auto* tensorX = blobX->GetMutableTensor(HIP); + auto* tensorX = BlobGetMutableTensor(blobX, HIP); tensorX->Resize(shapeX); - auto* tensorW = blobW->GetMutableTensor(HIP); + auto* tensorW = BlobGetMutableTensor(blobW, HIP); tensorW->Resize(shapeW); - auto* tensorY = blobY->GetMutableTensor(HIP); + auto* tensorY = BlobGetMutableTensor(blobY, HIP); tensorY->Resize(shapeY); - auto* tensorY_host = blobY_host->GetMutableTensor(CPU); + auto* tensorY_host = BlobGetMutableTensor(blobY_host, CPU); tensorY_host->Resize(shapeY); EXPECT_EQ(tensorX->size(), 50); @@ -126,13 +126,13 @@ TEST(MathROCBLASTest, GemmNoTransTrans) { vector shapeX{5, 10}; vector shapeW{6, 10}; vector shapeY{5, 6}; - auto* tensorX = blobX->GetMutableTensor(HIP); + auto* tensorX = BlobGetMutableTensor(blobX, HIP); tensorX->Resize(shapeX); - auto* tensorW = blobW->GetMutableTensor(HIP); + auto* tensorW = BlobGetMutableTensor(blobW, HIP); tensorW->Resize(shapeW); - auto* tensorY = blobY->GetMutableTensor(HIP); + auto* tensorY = BlobGetMutableTensor(blobY, HIP); tensorY->Resize(shapeY); - auto* tensorY_host = blobY_host->GetMutableTensor(CPU); + auto* tensorY_host = BlobGetMutableTensor(blobY_host, CPU); tensorY_host->Resize(shapeY); EXPECT_EQ(tensorX->size(), 50); @@ -225,13 +225,13 @@ TEST(MathROCBLASTest, GemvNoTrans) { vector shapeA{5, 10}; vector shapeX{10}; vector shapeY{5}; - auto* tensorA = blobA->GetMutableTensor(HIP); + auto* tensorA = BlobGetMutableTensor(blobA, HIP); tensorA->Resize(shapeA); - auto* tensorX = blobX->GetMutableTensor(HIP); + auto* tensorX = BlobGetMutableTensor(blobX, HIP); tensorX->Resize(shapeX); - auto* tensorY = blobY->GetMutableTensor(HIP); + auto* tensorY = BlobGetMutableTensor(blobY, HIP); tensorY->Resize(shapeY); - auto* tensorY_host = blobY_host->GetMutableTensor(CPU); + auto* tensorY_host = BlobGetMutableTensor(blobY_host, CPU); tensorY_host->Resize(shapeY); EXPECT_EQ(tensorA->size(), 50); @@ -315,13 +315,13 @@ TEST(MathROCBLASTest, GemvTrans) { vector shapeA{6, 10}; vector shapeX{6}; vector shapeY{10}; - auto* tensorA = blobA->GetMutableTensor(HIP); + auto* tensorA = BlobGetMutableTensor(blobA, HIP); tensorA->Resize(shapeA); - auto* tensorX = blobX->GetMutableTensor(HIP); + auto* tensorX = BlobGetMutableTensor(blobX, HIP); tensorX->Resize(shapeX); - auto* tensorY = blobY->GetMutableTensor(HIP); + auto* tensorY = BlobGetMutableTensor(blobY, HIP); tensorY->Resize(shapeY); - auto* tensorY_host = blobY_host->GetMutableTensor(CPU); + auto* tensorY_host = BlobGetMutableTensor(blobY_host, CPU); tensorY_host->Resize(shapeY); EXPECT_EQ(tensorA->size(), 60); diff --git a/caffe2/utils/math_gpu_test.cc b/caffe2/utils/math_gpu_test.cc index 9be1c3db6c1..4b0247a0786 100644 --- a/caffe2/utils/math_gpu_test.cc +++ b/caffe2/utils/math_gpu_test.cc @@ -41,9 +41,9 @@ void executeGpuBinaryOpTest( Blob* bloby = ws.CreateBlob("Y"); Blob* bloby_host = ws.CreateBlob("Y_host"); - auto* tensorx0 = blobx0->GetMutableTensor(CUDA); - auto* tensorx1 = blobx1->GetMutableTensor(CUDA); - auto* tensory = bloby->GetMutableTensor(CUDA); + auto* tensorx0 = BlobGetMutableTensor(blobx0, CUDA); + auto* tensorx1 = BlobGetMutableTensor(blobx1, CUDA); + auto* tensory = BlobGetMutableTensor(bloby, CUDA); vector shapex0_vector{shapex0}; vector shapex1_vector{shapex1}; @@ -71,7 +71,7 @@ void executeGpuBinaryOpTest( context.FinishDeviceComputation(); // Copy result to CPU so we can inspect it - auto* tensory_host = bloby_host->GetMutableTensor(CPU); + auto* tensory_host = BlobGetMutableTensor(bloby_host, CPU); tensory_host->CopyFrom(*tensory, &context); context.FinishDeviceComputation(); @@ -94,7 +94,7 @@ TEST(MathUtilGPUTest, testAddStripedBatch) { vector shapex{33 * 9, 25}; vector shapey{33, 25}; - auto* tensorx = blobx->GetMutableTensor(CUDA); + auto* tensorx = BlobGetMutableTensor(blobx, CUDA); tensorx->Resize(shapex); int stripe = 33 * 25; vector tot(33, 0.0); @@ -110,7 +110,7 @@ TEST(MathUtilGPUTest, testAddStripedBatch) { } } - auto* tensory = bloby->GetMutableTensor(CUDA); + auto* tensory = BlobGetMutableTensor(bloby, CUDA); tensory->Resize(shapey); math::Set( stripe, 0.0, tensory->mutable_data(), &context); @@ -125,7 +125,7 @@ TEST(MathUtilGPUTest, testAddStripedBatch) { context.FinishDeviceComputation(); // Copy result to CPU so we can inspect it - auto* tensory_host = bloby_host->GetMutableTensor(CPU); + auto* tensory_host = BlobGetMutableTensor(bloby_host, CPU); tensory_host->CopyFrom(*tensory, &context); context.FinishDeviceComputation(); @@ -258,9 +258,9 @@ class GemmBatchedGPUTest Blob* X_blob = ws_.CreateBlob("X"); Blob* W_blob = ws_.CreateBlob("W"); Blob* Y_blob = ws_.CreateBlob("Y"); - X_ = X_blob->GetMutableTensor(CUDA); - W_ = W_blob->GetMutableTensor(CUDA); - Y_ = Y_blob->GetMutableTensor(CUDA); + X_ = BlobGetMutableTensor(X_blob, CUDA); + W_ = BlobGetMutableTensor(W_blob, CUDA); + Y_ = BlobGetMutableTensor(Y_blob, CUDA); X_->Resize(std::vector{3, 5, 10}); W_->Resize(std::vector{3, 6, 10}); Y_->Resize(std::vector{3, 5, 6}); @@ -381,8 +381,8 @@ class ReduceTensorGPUTest : public testing::Test { cuda_context_ = make_unique(option_); Blob* blob_x = ws_.CreateBlob("X"); Blob* blob_y = ws_.CreateBlob("Y"); - X_ = blob_x->GetMutableTensor(CUDA); - Y_ = blob_y->GetMutableTensor(CUDA); + X_ = BlobGetMutableTensor(blob_x, CUDA); + Y_ = BlobGetMutableTensor(blob_y, CUDA); } void SetUpData( @@ -402,7 +402,7 @@ class ReduceTensorGPUTest : public testing::Test { void VerifyResult(const std::vector& expected_output) { Blob* blob_y_host = ws_.CreateBlob("Y_host"); - auto* Y_host = blob_y_host->GetMutableTensor(CPU); + auto* Y_host = BlobGetMutableTensor(blob_y_host, CPU); Y_host->CopyFrom(*Y_, cuda_context_.get()); cuda_context_->FinishDeviceComputation(); ASSERT_EQ(expected_output.size(), Y_host->size()); @@ -664,8 +664,8 @@ class BroadcastGPUTest : public testing::Test { cuda_context_ = make_unique(option_); Blob* blob_x = ws_.CreateBlob("X"); Blob* blob_y = ws_.CreateBlob("Y"); - X_ = blob_x->GetMutableTensor(CUDA); - Y_ = blob_y->GetMutableTensor(CUDA); + X_ = BlobGetMutableTensor(blob_x, CUDA); + Y_ = BlobGetMutableTensor(blob_y, CUDA); } void SetUpData( @@ -681,7 +681,7 @@ class BroadcastGPUTest : public testing::Test { void VerifyResult(const std::vector& expected_output) { Blob* blob_y_host = ws_.CreateBlob("Y_host"); - auto* Y_host = blob_y_host->GetMutableTensor(CPU); + auto* Y_host = BlobGetMutableTensor(blob_y_host, CPU); Y_host->CopyFrom(*Y_, cuda_context_.get()); cuda_context_->FinishDeviceComputation(); ASSERT_EQ(expected_output.size(), Y_host->size()); @@ -741,9 +741,9 @@ class MomentsGPUTest : public testing::Test { Blob* blob_x = ws_.CreateBlob("X"); Blob* blob_mean = ws_.CreateBlob("mean"); Blob* blob_variance = ws_.CreateBlob("variance"); - X_ = blob_x->GetMutableTensor(CUDA); - mean_ = blob_mean->GetMutableTensor(CUDA); - variance_ = blob_variance->GetMutableTensor(CUDA); + X_ = BlobGetMutableTensor(blob_x, CUDA); + mean_ = BlobGetMutableTensor(blob_mean, CUDA); + variance_ = BlobGetMutableTensor(blob_variance, CUDA); } void SetUpData( @@ -766,10 +766,10 @@ class MomentsGPUTest : public testing::Test { const std::vector& mean_data, const std::vector& variance_data) { Blob* blob_mean_host = ws_.CreateBlob("mean_host"); - auto* mean_host = blob_mean_host->GetMutableTensor(CPU); + auto* mean_host = BlobGetMutableTensor(blob_mean_host, CPU); mean_host->CopyFrom(*mean_, cuda_context_.get()); Blob* blob_variance_host = ws_.CreateBlob("variance_host"); - auto* variance_host = blob_variance_host->GetMutableTensor(CPU); + auto* variance_host = BlobGetMutableTensor(blob_variance_host, CPU); variance_host->CopyFrom(*variance_, cuda_context_.get()); cuda_context_->FinishDeviceComputation(); @@ -868,8 +868,8 @@ class TransposeGPUTest : public testing::Test { cuda_context_ = make_unique(option_); Blob* blob_x = ws_.CreateBlob("X"); Blob* blob_y = ws_.CreateBlob("Y"); - X_ = blob_x->GetMutableTensor(CUDA); - Y_ = blob_y->GetMutableTensor(CUDA); + X_ = BlobGetMutableTensor(blob_x, CUDA); + Y_ = BlobGetMutableTensor(blob_y, CUDA); } void SetUpData( @@ -890,7 +890,7 @@ class TransposeGPUTest : public testing::Test { void VerifyResult(const std::vector& expected_output) { Blob* blob_y_host = ws_.CreateBlob("Y_host"); - auto* Y_host = blob_y_host->GetMutableTensor(CPU); + auto* Y_host = BlobGetMutableTensor(blob_y_host, CPU); Y_host->CopyFrom(*Y_, cuda_context_.get()); cuda_context_->FinishDeviceComputation(); ASSERT_EQ(expected_output.size(), Y_host->size());