Drop caffe2 core tests and some other stuff (#127089)

Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/127089 Approved by: https://github.com/Skylion007
2025-12-06 12:20:52 +01:00 · 2024-05-29 17:11:45 +00:00 · 2024-05-29 17:11:45 +00:00 · cc6e72d882
commit cc6e72d882
parent e8e327ba82
29 changed files with 0 additions and 6925 deletions
--- a/caffe2/README.md
+++ b/caffe2/README.md
@ -1,19 +0,0 @@
-# Caffe2
-
-Caffe2 is a lightweight, modular, and scalable deep learning framework. Building on the original [Caffe](http://caffe.berkeleyvision.org), Caffe2 is designed with expression, speed, and modularity in mind.
-
-## Questions and Feedback
-
-Please use GitHub issues (https://github.com/pytorch/pytorch/issues) to ask questions, report bugs, and request new features.
-
-### Further Resources on [Caffe2.ai](http://caffe2.ai)
-
-* [Installation](http://caffe2.ai/docs/getting-started.html)
-* [Learn More](http://caffe2.ai/docs/learn-more.html)
-* [Upgrading to Caffe2](http://caffe2.ai/docs/caffe-migration.html)
-* [Datasets](http://caffe2.ai/docs/datasets.html)
-* [Model Zoo](http://caffe2.ai/docs/zoo.html)
-* [Tutorials](http://caffe2.ai/docs/tutorials.html)
-* [Operators Catalogue](http://caffe2.ai/docs/operators-catalogue.html)
-* [C++ API](http://caffe2.ai/doxygen-c/html/classes.html)
-* [Python API](http://caffe2.ai/doxygen-python/html/namespaces.html)
--- a/caffe2/VERSION_NUMBER
+++ b/caffe2/VERSION_NUMBER
@ -1 +0,0 @@
-0.8.2
--- a/caffe2/core/blob_gpu_test.cc
+++ b/caffe2/core/blob_gpu_test.cc
@ -1,227 +0,0 @@
-#include <iostream>  // NOLINT
-
-#include <gtest/gtest.h>
-#include "caffe2/core/blob.h"
-#include "caffe2/core/blob_serialization.h"
-#include "caffe2/core/common_gpu.h"
-#include "caffe2/core/context_gpu.h"
-#include "caffe2/proto/caffe2_pb.h"
-
-namespace caffe2 {
-namespace {
-
-template <typename T> class TensorGPUTest : public ::testing::Test {};
-template <typename T> class TensorGPUDeathTest : public ::testing::Test {};
-typedef ::testing::Types<char, int, float> TensorTypes;
-TYPED_TEST_CASE(TensorGPUTest, TensorTypes);
-TYPED_TEST_CASE(TensorGPUDeathTest, TensorTypes);
-
-TYPED_TEST(TensorGPUTest, TensorInitializedEmpty) {
-  if (!caffe2::HasCudaGPU()) return;
-  Tensor tensor(CUDA);
-  EXPECT_EQ(tensor.numel(), 0);
-  EXPECT_EQ(tensor.dim(), 1);
-  vector<int> dims(3);
-  dims[0] = 2;
-  dims[1] = 3;
-  dims[2] = 5;
-  tensor.Resize(dims);
-  EXPECT_EQ(tensor.dim(), 3);
-  EXPECT_EQ(tensor.dim32(0), 2);
-  EXPECT_EQ(tensor.dim32(1), 3);
-  EXPECT_EQ(tensor.dim32(2), 5);
-  EXPECT_TRUE(tensor.mutable_data<TypeParam>() != nullptr);
-  EXPECT_TRUE(tensor.data<TypeParam>() != nullptr);
-}
-
-TYPED_TEST(TensorGPUTest, TensorInitializedNonEmpty) {
-  if (!HasCudaGPU()) return;
-  vector<int> dims(3);
-  dims[0] = 2;
-  dims[1] = 3;
-  dims[2] = 5;
-  Tensor tensor(dims, CUDA);
-  EXPECT_EQ(tensor.dim(), 3);
-  EXPECT_EQ(tensor.dim32(0), 2);
-  EXPECT_EQ(tensor.dim32(1), 3);
-  EXPECT_EQ(tensor.dim32(2), 5);
-  EXPECT_TRUE(tensor.mutable_data<TypeParam>() != nullptr);
-  EXPECT_TRUE(tensor.data<TypeParam>() != nullptr);
-  dims[0] = 7;
-  dims[1] = 11;
-  dims[2] = 13;
-  dims.push_back(17);
-  tensor.Resize(dims);
-  EXPECT_EQ(tensor.dim(), 4);
-  EXPECT_EQ(tensor.dim32(0), 7);
-  EXPECT_EQ(tensor.dim32(1), 11);
-  EXPECT_EQ(tensor.dim32(2), 13);
-  EXPECT_EQ(tensor.dim32(3), 17);
-  EXPECT_TRUE(tensor.mutable_data<TypeParam>() != nullptr);
-  EXPECT_TRUE(tensor.data<TypeParam>() != nullptr);
-}
-
-TYPED_TEST(TensorGPUTest, TensorAlias) {
-  if (!HasCudaGPU()) return;
-  vector<int> dims(3);
-  dims[0] = 2;
-  dims[1] = 3;
-  dims[2] = 5;
-  Tensor tensor(dims, CUDA);
-  EXPECT_TRUE(tensor.mutable_data<TypeParam>() != nullptr);
-  Tensor other_tensor = tensor.Alias();
-  EXPECT_TRUE(tensor.data<TypeParam>() != nullptr);
-  EXPECT_TRUE(other_tensor.data<TypeParam>() != nullptr);
-  EXPECT_EQ(tensor.data<TypeParam>(), other_tensor.data<TypeParam>());
-}
-
-TYPED_TEST(TensorGPUTest, TensorAliasCanUseDifferentShapes) {
-  if (!HasCudaGPU()) return;
-  vector<int> dims(3);
-  dims[0] = 2;
-  dims[1] = 3;
-  dims[2] = 5;
-  vector<int> alternate_dims(1);
-  alternate_dims[0] = 2 * 3 * 5;
-  Tensor tensor(dims, CUDA);
-  EXPECT_TRUE(tensor.mutable_data<TypeParam>() != nullptr);
-  Tensor other_tensor = tensor.Alias();
-  other_tensor.Resize(alternate_dims);
-  EXPECT_EQ(other_tensor.dim(), 1);
-  EXPECT_EQ(other_tensor.dim32(0), alternate_dims[0]);
-  EXPECT_TRUE(tensor.data<TypeParam>() != nullptr);
-  EXPECT_TRUE(other_tensor.data<TypeParam>() != nullptr);
-  EXPECT_EQ(tensor.data<TypeParam>(), other_tensor.data<TypeParam>());
-}
-
-TYPED_TEST(TensorGPUTest, NoLongerAliasAfterNumelChanges) {
-  if (!HasCudaGPU()) return;
-  vector<int> dims(3);
-  dims[0] = 2;
-  dims[1] = 3;
-  dims[2] = 5;
-  Tensor tensor(dims, CUDA);
-  EXPECT_TRUE(tensor.mutable_data<TypeParam>() != nullptr);
-  Tensor other_tensor = tensor.Alias();
-  EXPECT_EQ(tensor.data<TypeParam>(), other_tensor.data<TypeParam>());
-  auto* old_pointer = other_tensor.data<TypeParam>();
-
-  dims[0] = 7;
-  tensor.Resize(dims);
-  EXPECT_EQ(old_pointer, other_tensor.data<TypeParam>());
-  EXPECT_NE(old_pointer, tensor.mutable_data<TypeParam>());
-}
-
-TYPED_TEST(TensorGPUDeathTest, CannotAccessDataWhenEmpty) {
-  if (!HasCudaGPU()) return;
-  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
-  Tensor tensor(CUDA);
-  EXPECT_EQ(tensor.dim(), 1);
-  EXPECT_EQ(tensor.numel(), 0);
-  EXPECT_THROW(tensor.data<TypeParam>(), EnforceNotMet);
-}
-
-#define TEST_SERIALIZATION_GPU_WITH_TYPE(TypeParam, field_name)            \
-  TEST(TensorGPUTest, TensorSerialization_##TypeParam) {                   \
-    if (!HasCudaGPU()) {                                                   \
-      return;                                                              \
-    }                                                                      \
-    Blob blob;                                                             \
-    Tensor cpu_tensor(CPU);                                                \
-    cpu_tensor.Resize(2, 3);                                               \
-    for (int i = 0; i < 6; ++i) {                                          \
-      cpu_tensor.mutable_data<TypeParam>()[i] = static_cast<TypeParam>(i); \
-    }                                                                      \
-    BlobGetMutableTensor(&blob, CUDA)->CopyFrom(cpu_tensor);               \
-    string serialized = SerializeBlob(blob, "test");                       \
-    BlobProto proto;                                                       \
-    CAFFE_ENFORCE(proto.ParseFromString(serialized));                      \
-    EXPECT_EQ(proto.name(), "test");                                       \
-    EXPECT_EQ(proto.type(), "Tensor");                                     \
-    EXPECT_TRUE(proto.has_tensor());                                       \
-    const TensorProto& tensor_proto = proto.tensor();                      \
-    EXPECT_EQ(                                                             \
-        tensor_proto.data_type(),                                          \
-        TypeMetaToDataType(TypeMeta::Make<TypeParam>()));                  \
-    EXPECT_EQ(tensor_proto.field_name##_size(), 6);                        \
-    for (int i = 0; i < 6; ++i) {                                          \
-      EXPECT_EQ(tensor_proto.field_name(i), static_cast<TypeParam>(i));    \
-    }                                                                      \
-    Blob new_blob;                                                         \
-    EXPECT_NO_THROW(DeserializeBlob(serialized, &new_blob));               \
-    EXPECT_TRUE(BlobIsTensorType(new_blob, CUDA));                         \
-    Tensor new_cpu_tensor(blob.Get<Tensor>(), CPU);                        \
-    EXPECT_EQ(new_cpu_tensor.dim(), 2);                                    \
-    EXPECT_EQ(new_cpu_tensor.size(0), 2);                                  \
-    EXPECT_EQ(new_cpu_tensor.size(1), 3);                                  \
-    for (int i = 0; i < 6; ++i) {                                          \
-      EXPECT_EQ(                                                           \
-          cpu_tensor.data<TypeParam>()[i],                                 \
-          new_cpu_tensor.data<TypeParam>()[i]);                            \
-    }                                                                      \
-  }
-
-TEST_SERIALIZATION_GPU_WITH_TYPE(bool, int32_data)
-TEST_SERIALIZATION_GPU_WITH_TYPE(double, double_data)
-TEST_SERIALIZATION_GPU_WITH_TYPE(float, float_data)
-TEST_SERIALIZATION_GPU_WITH_TYPE(int, int32_data)
-TEST_SERIALIZATION_GPU_WITH_TYPE(int8_t, int32_data)
-TEST_SERIALIZATION_GPU_WITH_TYPE(int16_t, int32_data)
-TEST_SERIALIZATION_GPU_WITH_TYPE(uint8_t, int32_data)
-TEST_SERIALIZATION_GPU_WITH_TYPE(uint16_t, int32_data)
-TEST_SERIALIZATION_GPU_WITH_TYPE(int64_t, int64_data)
-
-TEST(TensorConstruction, ReinitializeTensorTest) {
-  if (!HasCudaGPU()) return;
-  Tensor x = caffe2::empty({1}, at::dtype<float>().device(CUDA, 0));
-  auto* data_before = x.template mutable_data<float>();
-  // We'll only compare device_type in ReinitializeTensor,
-  // so no tensor reallocation will happen here
-  ReinitializeTensor(&x, {1}, at::dtype<float>().device(CUDA));
-  auto* data_after = x.template mutable_data<float>();
-  EXPECT_EQ(data_before, data_after);
-}
-
-TEST(TensorTest, TensorSerializationMultiDevices) {
-  Blob blob;
-  Tensor tensor(CPU);
-  tensor.Resize(2, 3);
-  for (int i = 0; i < 6; ++i) {
-    tensor.mutable_data<float>()[i] = i;
-  }
-  for (int gpu_id = 0; gpu_id < NumCudaDevices(); ++gpu_id) {
-    CUDAGuard guard(gpu_id);
-    CUDAContext context(gpu_id); // switch to the current gpu
-    blob.Reset(new Tensor(tensor, CUDA));
-    string serialized = SerializeBlob(blob, "test");
-    BlobProto proto;
-    CAFFE_ENFORCE(proto.ParseFromString(serialized));
-    EXPECT_EQ(proto.name(), "test");
-    EXPECT_TRUE(proto.has_tensor());
-    const TensorProto& tensor_proto = proto.tensor();
-    EXPECT_EQ(tensor_proto.data_type(), TensorProto::FLOAT);
-    EXPECT_EQ(tensor_proto.float_data_size(), 6);
-    for (int i = 0; i < 6; ++i) {
-      EXPECT_EQ(tensor_proto.float_data(i), i);
-    }
-    EXPECT_TRUE(tensor_proto.has_device_detail());
-    EXPECT_EQ(tensor_proto.device_detail().device_type(), PROTO_CUDA);
-    EXPECT_EQ(tensor_proto.device_detail().device_id(), gpu_id);
-    // Test if the restored blob is still of the same device.
-    blob.Reset();
-    EXPECT_NO_THROW(DeserializeBlob(serialized, &blob));
-    EXPECT_TRUE(BlobIsTensorType(blob, CUDA));
-    EXPECT_EQ(GetGPUIDForPointer(blob.Get<TensorCUDA>().data<float>()),
-              gpu_id);
-    // Test if we force the restored blob on a different device, we
-    // can still get so.
-    blob.Reset();
-    proto.mutable_tensor()->mutable_device_detail()->set_device_id(0);
-    EXPECT_NO_THROW(DeserializeBlob(proto.SerializeAsString(), &blob));
-    EXPECT_TRUE(BlobIsTensorType(blob, CUDA));
-    EXPECT_EQ(GetGPUIDForPointer(blob.Get<TensorCUDA>().data<float>()), 0);
-  }
-}
-
-}  // namespace
-}  // namespace caffe2
--- a/caffe2/core/blob_test.cc
+++ b/caffe2/core/blob_test.cc
--- a/caffe2/core/context_gpu_test.cc
+++ b/caffe2/core/context_gpu_test.cc
@ -1,161 +0,0 @@
-#include <chrono>
-#include <future>
-#include <random>
-#include <thread>
-#include <array>
-
-#include "caffe2/core/context_gpu.h"
-#include <gtest/gtest.h>
-
-namespace caffe2 {
-
-TEST(CUDATest, HasCudaRuntime) {
-  EXPECT_TRUE(HasCudaRuntime());
-}
-
-TEST(CUDAContextTest, TestAllocDealloc) {
-  if (!HasCudaGPU()) return;
-  CUDAContext context(0);
-  context.SwitchToDevice();
-  auto data = CUDAContext::New(10 * sizeof(float));
-  EXPECT_NE(data.get(), nullptr);
-}
-
-TEST(CUDAContextTest, TestSetGetDeviceWithoutCaffeMode) {
-  // For a while, set full device control to be true.
-  for (int i = 0; i < NumCudaDevices(); ++i) {
-    CaffeCudaSetDevice(i);
-    EXPECT_EQ(CaffeCudaGetDevice(), i);
-  }
-  for (int i = NumCudaDevices() - 1; i >= 0; --i) {
-    CaffeCudaSetDevice(i);
-    EXPECT_EQ(CaffeCudaGetDevice(), i);
-  }
-}
-
-TEST(CUDAContextTest, MemoryPoolAllocateDealloc) {
-  if (!HasCudaGPU())
-    return;
-  if (GetCudaMemoryPoolType() == CudaMemoryPoolType::NONE) {
-    LOG(ERROR) << "Choose a memory type that is not none to test memory pool.";
-    return;
-  }
-  const int nbytes = 1048576;
-  for (int i = 0; i < NumCudaDevices(); ++i) {
-    LOG(INFO) << "Device " << i << " of " << NumCudaDevices();
-    CUDAGuard guard(i);
-    auto allocated = CUDAContext::New(nbytes);
-    EXPECT_NE(allocated, nullptr);
-    cudaPointerAttributes attr;
-    CUDA_ENFORCE(cudaPointerGetAttributes(&attr, allocated.get()));
-    EXPECT_EQ(attr.type, cudaMemoryTypeDevice);
-    EXPECT_EQ(attr.device, i);
-    void* prev_allocated = allocated.get();
-    allocated.clear();
-    auto new_allocated = CUDAContext::New(nbytes);
-    // With a pool, the above allocation should yield the same address.
-    EXPECT_EQ(new_allocated.get(), prev_allocated);
-    // But, if we are allocating something larger, we will have a different
-    // chunk of memory.
-    auto larger_allocated = CUDAContext::New(nbytes * 2);
-    EXPECT_NE(larger_allocated.get(), prev_allocated);
-  }
-}
-
-cudaStream_t getStreamForHandle(cublasHandle_t handle) {
-  cudaStream_t stream = nullptr;
-  CUBLAS_ENFORCE(cublasGetStream(handle, &stream));
-  TORCH_CHECK_NOTNULL(stream);
-  return stream;
-}
-
-TEST(CUDAContextTest, TestSameThreadSameObject) {
-  if (!HasCudaGPU()) return;
-  CUDAContext context_a(0);
-  CUDAContext context_b(0);
-  EXPECT_EQ(context_a.cuda_stream(), context_b.cuda_stream());
-  EXPECT_EQ(context_a.cublas_handle(), context_b.cublas_handle());
-  EXPECT_EQ(
-      context_a.cuda_stream(), getStreamForHandle(context_b.cublas_handle()));
-  // CuRAND generators are context-local.
-  EXPECT_NE(context_a.curand_generator(), context_b.curand_generator());
-}
-
-TEST(CUDAContextTest, TestSameThreadTempObject) {
-  if (!HasCudaGPU())
-    return;
-  CUDAContext context_outer(0); // gpu id
-  context_outer.SwitchToDevice();
-
-  if (NumCudaDevices() >= 2) {
-    auto before_stream = context_outer.cuda_stream();
-
-    // try to mess up current device
-    CUDAContext context_different_device(1);
-    context_different_device.SwitchToDevice(10);
-
-    // go back
-    context_outer.SwitchToDevice();
-    EXPECT_EQ(context_outer.cuda_stream(), before_stream);
-
-    // do nothing - infers the current device and stream
-    CUDAContext context_noop;
-    EXPECT_EQ(context_outer.cuda_stream(), before_stream);
-    EXPECT_EQ(context_noop.cuda_stream(), before_stream);
-
-
-    // override stream - the previous context is not valid any more until
-    // SwitchToDevice is called again (needs to be refactored into proper guard)
-    CUDAContext context_override;
-    context_override.SwitchToDevice(1); // logical stream id
-    EXPECT_NE(context_override.cuda_stream(), before_stream);
-    // note, that accessing streams from context_outer and context_noop is not
-    // semantically valid any more
-  }
-}
-
-TEST(CUDAContextTest, TestSameThreadDifferntObjectIfDifferentDevices) {
-  if (NumCudaDevices() > 1) {
-    CUDAContext context_a(0);
-    CUDAContext context_b(1);
-    EXPECT_NE(context_a.cuda_stream(), context_b.cuda_stream());
-    EXPECT_NE(context_a.cublas_handle(), context_b.cublas_handle());
-    EXPECT_NE(
-        context_a.cuda_stream(), getStreamForHandle(context_b.cublas_handle()));
-    EXPECT_NE(context_a.curand_generator(), context_b.curand_generator());
-  }
-}
-
-namespace {
-// A test function to return a stream address from a temp CUDA context. You
-// should not use that stream though, because the actual stream is destroyed
-// after thread exit.
-void TEST_GetStreamAddress(cudaStream_t* ptr) {
-  CUDAContext context(0);
-  context.SwitchToDevice();
-  *ptr = context.cuda_stream();
-  // Sleep for a while so we have concurrent thread executions
-  std::this_thread::sleep_for(std::chrono::seconds(1));
-}
-}  // namespace
-
-TEST(CUDAContextTest, TestDifferntThreadDifferentobject) {
-  if (!HasCudaGPU()) return;
-  std::array<cudaStream_t, 2> temp = {0};
-  // Same thread
-  TEST_GetStreamAddress(&temp[0]);
-  TEST_GetStreamAddress(&temp[1]);
-  EXPECT_TRUE(temp[0] != nullptr);
-  EXPECT_TRUE(temp[1] != nullptr);
-  EXPECT_EQ(temp[0], temp[1]);
-  // Different threads
-  std::thread thread_a(TEST_GetStreamAddress, &temp[0]);
-  std::thread thread_b(TEST_GetStreamAddress, &temp[1]);
-  thread_a.join();
-  thread_b.join();
-  EXPECT_TRUE(temp[0] != nullptr);
-  EXPECT_TRUE(temp[1] != nullptr);
-  EXPECT_NE(temp[0], temp[1]);
-}
-
-}  // namespace caffe2
--- a/caffe2/core/context_test.cc
+++ b/caffe2/core/context_test.cc
@ -1,38 +0,0 @@
-#include <random>
-
-#include <c10/core/alignment.h>
-#include <gtest/gtest.h>
-#include "caffe2/core/context.h"
-#include "caffe2/proto/caffe2_pb.h"
-
-namespace caffe2 {
-
-TEST(CPUContextTest, TestAllocAlignment) {
-  for (int i = 1; i < 10; ++i) {
-    auto data = CPUContext::New(i);
-    EXPECT_EQ((reinterpret_cast<size_t>(data.get()) % gAlignment), 0);
-    // data is freed when out of scope
-  }
-}
-
-TEST(CPUContextTest, TestAllocDealloc) {
-  auto data_ptr = CPUContext::New(10 * sizeof(float));
-  float* data = static_cast<float*>(data_ptr.get());
-  EXPECT_NE(data, nullptr);
-  auto dst_data_ptr = CPUContext::New(10 * sizeof(float));
-  float* dst_data = static_cast<float*>(dst_data_ptr.get());
-  EXPECT_NE(dst_data, nullptr);
-  for (int i = 0; i < 10; ++i) {
-    // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
-    data[i] = i;
-  }
-  DeviceOption option;
-  CPUContext context(option);
-  context.CopyToCPU<float>(10, data, dst_data);
-  for (int i = 0; i < 10; ++i) {
-    EXPECT_FLOAT_EQ(dst_data[i], i);
-  }
-  // data_ptr is freed when out of scope
-}
-
-}  // namespace caffe2
--- a/caffe2/core/event_gpu_test.cc
+++ b/caffe2/core/event_gpu_test.cc
@ -1,50 +0,0 @@
-#include <gtest/gtest.h>
-#include "caffe2/core/context.h"
-#include "caffe2/core/context_gpu.h"
-#include "caffe2/core/event.h"
-
-namespace caffe2 {
-
-TEST(EventCUDATest, EventBasics) {
-  if (!HasCudaGPU())
-    return;
-  DeviceOption device_cpu;
-  device_cpu.set_device_type(PROTO_CPU);
-  DeviceOption device_cuda;
-  device_cuda.set_device_type(PROTO_CUDA);
-
-  CPUContext context_cpu(device_cpu);
-  CUDAContext context_cuda(device_cuda);
-
-  Event event_cpu(device_cpu);
-  Event event_cuda(device_cuda);
-
-  // CPU context and event interactions
-  context_cpu.Record(&event_cpu);
-  event_cpu.SetFinished();
-  event_cpu.Finish();
-  context_cpu.WaitEvent(event_cpu);
-
-  event_cpu.Reset();
-  event_cpu.Record(CPU, &context_cpu);
-  event_cpu.SetFinished();
-  event_cpu.Wait(CPU, &context_cpu);
-
-  // CUDA context and event interactions
-  context_cuda.SwitchToDevice();
-  context_cuda.Record(&event_cuda);
-  context_cuda.WaitEvent(event_cuda);
-  event_cuda.Finish();
-
-  event_cuda.Reset();
-  event_cuda.Record(CUDA, &context_cuda);
-  event_cuda.Wait(CUDA, &context_cuda);
-
-  // CPU context waiting for CUDA event
-  context_cpu.WaitEvent(event_cuda);
-
-  // CUDA context waiting for CPU event
-  context_cuda.WaitEvent(event_cpu);
-}
-
-} // namespace caffe2
--- a/caffe2/core/event_test.cc
+++ b/caffe2/core/event_test.cc
@ -1,41 +0,0 @@
-#include <gtest/gtest.h>
-#include "caffe2/core/context.h"
-#include "caffe2/core/event.h"
-
-namespace caffe2 {
-
-TEST(EventCPUTest, EventBasics) {
-  DeviceOption device_option;
-  device_option.set_device_type(PROTO_CPU);
-  Event event(device_option);
-  CPUContext context;
-
-  context.Record(&event);
-  event.SetFinished();
-
-  context.WaitEvent(event);
-  event.Finish();
-
-  event.Reset();
-  event.Record(CPU, &context);
-  event.SetFinished();
-  event.Wait(CPU, &context);
-}
-
-TEST(EventCPUTest, EventErrors) {
-  DeviceOption device_option;
-  device_option.set_device_type(PROTO_CPU);
-  Event event(device_option);
-
-  event.SetFinished();
-  // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
-  ASSERT_THROW(event.SetFinished("error"), caffe2::EnforceNotMet);
-  ASSERT_EQ(event.ErrorMessage(), "No error");
-
-  event.Reset();
-  event.SetFinished("error 1");
-  event.SetFinished("error 2");
-  ASSERT_EQ(event.ErrorMessage(), "error 1");
-}
-
-} // namespace caffe2
--- a/caffe2/core/graph_test.cc
+++ b/caffe2/core/graph_test.cc
@ -1,200 +0,0 @@
-#include <gtest/gtest.h>
-#include "caffe2/core/graph.h"
-#include "caffe2/core/net.h"
-#include "caffe2/core/operator.h"
-
-namespace caffe2 {
-
-namespace {
-
-using transform::Graph;
-
-static std::atomic<int> counter;
-
-class GraphDummyOp final : public OperatorBase {
- public:
-  using OperatorBase::OperatorBase;
-  bool Run(int /* unused */) override {
-    counter.fetch_add(1);
-    return true;
-  }
-};
-
-REGISTER_CPU_OPERATOR(GraphDummyOp1, GraphDummyOp);
-
-OPERATOR_SCHEMA(GraphDummyOp1)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX)
-    .AllowInplace({{0, 0}, {1, 1}});
-
-REGISTER_CPU_OPERATOR(GraphDummyOp2, GraphDummyOp);
-
-OPERATOR_SCHEMA(GraphDummyOp2)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX)
-    .AllowInplace({{0, 0}, {1, 1}});
-
-REGISTER_CPU_OPERATOR(GraphDummyOp3, GraphDummyOp);
-
-OPERATOR_SCHEMA(GraphDummyOp3)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX)
-    .AllowInplace({{0, 0}, {1, 1}});
-
-// Checks if two netdefs are  in terms of type, input, and output.
-void compare_netdefs(const NetDef& net_a, const NetDef& net_b) {
-  EXPECT_EQ(net_a.op_size(), net_b.op_size());
-  for (int i = 0; i < net_a.op_size(); i++) {
-    EXPECT_EQ(net_a.op(i).type(), net_b.op(i).type());
-    EXPECT_EQ(net_a.op(i).input_size(), net_b.op(i).input_size());
-    for (int j = 0; j < net_a.op(i).input_size(); j++) {
-      EXPECT_EQ(net_a.op(i).input(j), net_b.op(i).input(j));
-    }
-    EXPECT_EQ(net_a.op(i).output_size(), net_b.op(i).output_size());
-    for (int j = 0; j < net_a.op(i).output_size(); j++) {
-      EXPECT_EQ(net_a.op(i).output(j), net_b.op(i).output(j));
-    }
-  }
-}
-
-TEST(GraphTest, TestGenerateGraphChain) {
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef netdef;
-  AddOp(&netdef, "GraphDummyOp1", {"in"}, {"mid1"});
-  AddOp(&netdef, "GraphDummyOp2", {"mid1"}, {"mid2"});
-  AddOp(&netdef, "GraphDummyOp1", {"mid2"}, {"mid3"});
-  AddOp(&netdef, "GraphDummyOp2", {"mid3"}, {"out"});
-  Graph g(netdef);
-  EXPECT_EQ(g.size(), 4);
-  for (int i = 0; i < 4; i++) {
-    if (i < 3) {
-      EXPECT_EQ(g.node(i).children.size(), 1);
-      EXPECT_TRUE(g.node(i).children.count(i + 1));
-    }
-    if (i > 0) {
-      EXPECT_EQ(g.node(i).parents.size(), 1);
-      EXPECT_TRUE(g.node(i).parents.count(i - 1));
-    }
-  }
-  NetDef retrieved_net = g.GetNetDef();
-  compare_netdefs(retrieved_net, netdef);
-}
-
-TEST(GraphTest, TestGenerateGraphChainInPlace) {
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef netdef;
-  AddOp(&netdef, "GraphDummyOp1", {"in"}, {"out"});
-  AddOp(&netdef, "GraphDummyOp2", {"out"}, {"out"});
-  AddOp(&netdef, "GraphDummyOp1", {"out"}, {"out"});
-  AddOp(&netdef, "GraphDummyOp2", {"out"}, {"out"});
-  Graph g(netdef);
-  EXPECT_EQ(g.size(), 4);
-  for (int i = 0; i < 4; i++) {
-    if (i < 3) {
-      EXPECT_EQ(g.node(i).children.size(), 1);
-      EXPECT_TRUE(g.node(i).children.count(i + 1));
-    }
-    if (i > 0) {
-      EXPECT_EQ(g.node(i).parents.size(), 1);
-      EXPECT_TRUE(g.node(i).parents.count(i - 1));
-    }
-  }
-  NetDef retrieved_net = g.GetNetDef();
-  compare_netdefs(retrieved_net, netdef);
-}
-
-// Diamond Graph
-TEST(GraphTest, TestGenerateGraphBranch) {
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef netdef;
-
-  AddOp(&netdef, "GraphDummyOp1", {"in"}, {"mid1"});
-  AddOp(&netdef, "GraphDummyOp2", {"mid1"}, {"mid2"});
-  AddOp(&netdef, "GraphDummyOp2", {"mid1"}, {"mid3"});
-  AddOp(&netdef, "GraphDummyOp3", {"mid2", "mid3"}, {"out"});
-
-  Graph g(netdef);
-
-  EXPECT_EQ(g.size(), 4);
-  EXPECT_EQ(g.node(0).parents.size(), 0);
-  EXPECT_EQ(g.node(0).children.size(), 2);
-  EXPECT_EQ(g.node(1).parents.size(), 1);
-  EXPECT_EQ(g.node(1).children.size(), 1);
-  EXPECT_EQ(g.node(2).parents.size(), 1);
-  EXPECT_EQ(g.node(2).children.size(), 1);
-  EXPECT_EQ(g.node(3).parents.size(), 2);
-  EXPECT_EQ(g.node(3).children.size(), 0);
-
-  NetDef retrieved_net = g.GetNetDef();
-  compare_netdefs(retrieved_net, netdef);
-}
-
-// Double Diamond Graph, reused names
-TEST(GraphTest, TestReusedInputs) {
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef netdef;
-
-  AddOp(&netdef, "GraphDummyOp1", {"in"}, {"in"});
-  AddOp(&netdef, "GraphDummyOp2", {"in"}, {"mid1"});
-  AddOp(&netdef, "GraphDummyOp2", {"in"}, {"mid2"});
-  AddOp(&netdef, "GraphDummyOp3", {"mid1", "mid2"}, {"in"});
-  AddOp(&netdef, "GraphDummyOp2", {"in"}, {"mid1"});
-  AddOp(&netdef, "GraphDummyOp2", {"in"}, {"mid2"});
-  AddOp(&netdef, "GraphDummyOp3", {"mid1", "mid2"}, {"in"});
-
-  Graph g(netdef);
-
-  EXPECT_EQ(g.size(), 7);
-  EXPECT_EQ(g.node(0).parents.size(), 0);
-  EXPECT_EQ(g.node(0).children.size(), 2);
-  EXPECT_EQ(g.node(1).parents.size(), 1);
-  EXPECT_EQ(g.node(1).children.size(), 1);
-  EXPECT_EQ(g.node(2).parents.size(), 1);
-  EXPECT_EQ(g.node(2).children.size(), 1);
-  EXPECT_EQ(g.node(3).parents.size(), 2);
-  EXPECT_EQ(g.node(3).children.size(), 2);
-  EXPECT_EQ(g.node(4).parents.size(), 1);
-  EXPECT_EQ(g.node(4).children.size(), 1);
-  EXPECT_EQ(g.node(5).parents.size(), 1);
-  EXPECT_EQ(g.node(5).children.size(), 1);
-  EXPECT_EQ(g.node(6).parents.size(), 2);
-  EXPECT_EQ(g.node(6).children.size(), 0);
-
-  NetDef retrieved_net = g.GetNetDef();
-  compare_netdefs(retrieved_net, netdef);
-}
-
-TEST(GraphTest, TestGetPerimeter) {
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef netdef;
-
-  AddOp(&netdef, "GraphDummyOp1", {"in"}, {"in"});
-  AddOp(&netdef, "GraphDummyOp2", {"in"}, {"mid1"});
-  AddOp(&netdef, "GraphDummyOp2", {"in"}, {"mid2"});
-  AddOp(&netdef, "GraphDummyOp3", {"mid1", "mid2"}, {"in"});
-  AddOp(&netdef, "GraphDummyOp2", {"in"}, {"mid1"});
-  AddOp(&netdef, "GraphDummyOp2", {"in"}, {"mid2"});
-  AddOp(&netdef, "GraphDummyOp1", {"mid1", "mid2"}, {"in"});
-
-  Graph g(netdef);
-  std::vector<int> subgraph = {3};
-
-  auto subgraph_input = g.GetSubgraphInput(subgraph);
-  EXPECT_EQ(subgraph_input.size(), 2);
-  EXPECT_EQ(subgraph_input[0], std::make_pair(string("mid1"), 1));
-  EXPECT_EQ(subgraph_input[1], std::make_pair(string("mid2"), 2));
-
-  auto subgraph_output = g.GetSubgraphOutput(subgraph);
-  EXPECT_EQ(subgraph_output.size(), 2);
-  EXPECT_EQ(subgraph_output[0], std::make_pair(string("in"), 4));
-  EXPECT_EQ(subgraph_output[1], std::make_pair(string("in"), 5));
-}
-
-} // namespace
-
-} // namespace caffe2
--- a/caffe2/core/init_test.cc
+++ b/caffe2/core/init_test.cc
@ -1,72 +0,0 @@
-#include <iostream>
-#include <memory>
-
-#include <gtest/gtest.h>
-#include "caffe2/core/init.h"
-#include "caffe2/core/logging.h"
-
-namespace caffe2 {
-namespace {
-bool gTestInitFunctionHasBeenRun = false;
-bool gTestFailInitFunctionHasBeenRun = false;
-
-bool TestInitFunction(int*, char***) {
-  gTestInitFunctionHasBeenRun = true;
-  return true;
-}
-
-bool TestFailInitFunction(int*, char***) {
-  gTestFailInitFunctionHasBeenRun = true;
-  return false;
-}
-
-REGISTER_CAFFE2_INIT_FUNCTION(
-    TestInitFunction,
-    &TestInitFunction,
-    "Just a test to see if GlobalInit invokes "
-    "registered functions correctly.");
-
-int dummy_argc = 1;
-const char* dummy_name = "foo";
-// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-pro-type-const-cast)
-char** dummy_argv = const_cast<char**>(&dummy_name);
-} // namespace
-
-TEST(InitTest, TestInitFunctionHasRun) {
-  caffe2::GlobalInit(&dummy_argc, &dummy_argv);
-  EXPECT_TRUE(gTestInitFunctionHasBeenRun);
-  EXPECT_FALSE(gTestFailInitFunctionHasBeenRun);
-}
-
-TEST(InitTest, CanRerunGlobalInit) {
-  caffe2::GlobalInit(&dummy_argc, &dummy_argv);
-  EXPECT_TRUE(caffe2::GlobalInit(&dummy_argc, &dummy_argv));
-}
-
-void LateRegisterInitFunction() {
-  ::caffe2::InitRegisterer testInitFunc(
-      TestInitFunction, false, "This should succeed but warn");
-}
-
-void LateRegisterEarlyInitFunction() {
-  ::caffe2::InitRegisterer testSecondInitFunc(
-      TestInitFunction, true, "This should fail for early init");
-}
-
-void LateRegisterFailInitFunction() {
-  ::caffe2::InitRegisterer testSecondInitFunc(
-      TestFailInitFunction, false, "This should fail for failed init");
-}
-
-TEST(InitTest, FailLateRegisterInitFunction) {
-  caffe2::GlobalInit(&dummy_argc, &dummy_argv);
-  LateRegisterInitFunction();
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  EXPECT_THROW(LateRegisterEarlyInitFunction(), ::c10::Error);
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  EXPECT_THROW(LateRegisterFailInitFunction(), ::c10::Error);
-  EXPECT_TRUE(gTestInitFunctionHasBeenRun);
-  EXPECT_TRUE(gTestFailInitFunctionHasBeenRun);
-}
-
-} // namespace caffe2
--- a/caffe2/core/module_test.cc
+++ b/caffe2/core/module_test.cc
@ -1,78 +0,0 @@
-#include <iostream>
-#include <memory>
-
-#include "caffe2/core/module.h"
-#include "caffe2/core/operator.h"
-#include <gtest/gtest.h>
-#include "caffe2/core/logging.h"
-
-// An explicitly defined module, testing correctness when we statically link a
-// module
-CAFFE2_MODULE(caffe2_module_test_static, "Static module for testing.");
-
-namespace caffe2 {
-
-class Caffe2ModuleTestStaticDummyOp : public OperatorBase {
- public:
-  using OperatorBase::OperatorBase;
-  bool Run(int /* unused */ /*stream_id*/) override {
-    return true;
-  }
-  virtual string type() {
-    return "base";
-  }
-};
-
-REGISTER_CPU_OPERATOR(
-  Caffe2ModuleTestStaticDummy, Caffe2ModuleTestStaticDummyOp);
-OPERATOR_SCHEMA(Caffe2ModuleTestStaticDummy);
-
-TEST(ModuleTest, StaticModule) {
-  const string name = "caffe2_module_test_static";
-  const auto& modules = CurrentModules();
-  EXPECT_EQ(modules.count(name), 1);
-  EXPECT_TRUE(HasModule(name));
-
-  // LoadModule should not raise an error, since the module is already present.
-  LoadModule(name);
-  // Even a non-existing path should not cause error.
-  LoadModule(name, "/does/not/exist.so");
-  EXPECT_EQ(modules.count(name), 1);
-  EXPECT_TRUE(HasModule(name));
-
-  // The module will then introduce the Caffe2ModuleTestStaticDummyOp.
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("Caffe2ModuleTestStaticDummy");
-  unique_ptr<OperatorBase> op = CreateOperator(op_def, &ws);
-  EXPECT_NE(nullptr, op.get());
-}
-
-#ifdef CAFFE2_BUILD_SHARED_LIBS
-TEST(ModuleTest, DynamicModule) {
-  const string name = "caffe2_module_test_dynamic";
-  const auto& modules = CurrentModules();
-  EXPECT_EQ(modules.count(name), 0);
-  EXPECT_FALSE(HasModule(name));
-
-  // Before loading, we should not be able to create the op.
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("Caffe2ModuleTestDynamicDummy");
-  // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
-  EXPECT_THROW(
-      CreateOperator(op_def, &ws),
-      EnforceNotMet);
-
-  // LoadModule should load the proper module.
-  LoadModule(name);
-  EXPECT_EQ(modules.count(name), 1);
-  EXPECT_TRUE(HasModule(name));
-
-  // The module will then introduce the Caffe2ModuleTestDynamicDummyOp.
-  unique_ptr<OperatorBase> op_after_load = CreateOperator(op_def, &ws);
-  EXPECT_NE(nullptr, op_after_load.get());
-}
-#endif
-
-}  // namespace caffe2
--- a/caffe2/core/net_async_tracing_test.cc
+++ b/caffe2/core/net_async_tracing_test.cc
@ -1,114 +0,0 @@
-/**
- * Copyright (c) 2016-present, Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include "caffe2/core/net_async_tracing.h"
-
-namespace caffe2 {
-
-namespace tracing {
-
-void testExtractShardId(const string& name, int expectedId) {
-  EXPECT_EQ(extractShardId(name), expectedId);
-}
-
-TEST(NetAsyncTracingTest, ExtractShardId) {
-  testExtractShardId("ABCDEFshard:1705!!A", 1705);
-  // Should use the last one
-  testExtractShardId("ABCDEFshard:4324!!Ashard:01220b", 1220);
-  // Nothing to extract
-  testExtractShardId("ABCDEFsha:222", -1);
-  // Regular cases
-  testExtractShardId("FC:shard:0", 0);
-  testExtractShardId("FC:shard:10", 10);
-  testExtractShardId("FC:shard:15", 15);
-}
-
-TEST(NetAsyncTracingTest, EveryKIteration) {
-  const auto spec = R"DOC(
-      name: "example"
-      type: "async_scheduling"
-      arg {
-        name: "enable_tracing"
-        i: 1
-      }
-      arg {
-        name: "tracing_mode"
-        s: "EVERY_K_ITERATIONS"
-      }
-      arg {
-        name: "tracing_filepath"
-        s: "/tmp"
-      }
-      arg {
-        name: "trace_every_nth_batch"
-        i: 1
-      }
-      arg {
-        name: "dump_every_nth_batch"
-        i: 1
-      }
-      op {
-        output: "out"
-        type: "UniformFill"
-      }
-)DOC";
-
-  NetDef net_def;
-  CAFFE_ENFORCE(TextFormat::ParseFromString(spec, &net_def));
-
-  Workspace ws;
-  std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
-  net->Run();
-}
-
-TEST(NetAsyncTracingTest, GlobalTimeSlice) {
-  const auto spec = R"DOC(
-      name: "example"
-      type: "async_scheduling"
-      arg {
-        name: "enable_tracing"
-        i: 1
-      }
-      arg {
-        name: "tracing_filepath"
-        s: "/tmp"
-      }
-      arg {
-        name: "trace_for_n_ms"
-        i: 1
-      }
-      arg {
-        name: "trace_every_n_ms"
-        i: 1
-      }
-      op {
-        output: "out"
-        type: "UniformFill"
-      }
-)DOC";
-
-  NetDef net_def;
-  CAFFE_ENFORCE(TextFormat::ParseFromString(spec, &net_def));
-
-  Workspace ws;
-  std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
-  net->Run();
-}
-
-} // namespace tracing
-
-} // namespace caffe2
--- a/caffe2/core/net_dag_utils_test.cc
+++ b/caffe2/core/net_dag_utils_test.cc
@ -1,296 +0,0 @@
-#include <gtest/gtest.h>
-#include "caffe2/core/net_dag_utils.h"
-#include "caffe2/core/operator.h"
-
-namespace caffe2 {
-
-namespace {
-class DummySyncOp final : public Operator<CPUContext> {
- public:
-  DummySyncOp(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<CPUContext>(operator_def, ws) {}
-
-  bool RunOnDevice() override {
-    return true;
-  }
-};
-
-class DummyAsyncOp final : public Operator<CPUContext> {
- public:
-  DummyAsyncOp(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<CPUContext>(operator_def, ws) {}
-
-  bool RunOnDevice() override {
-    return true;
-  }
-
-  bool HasAsyncPart() const override {
-    return true;
-  }
-};
-
-REGISTER_CPU_OPERATOR(DagUtilTestDummySync, DummySyncOp);
-REGISTER_CPU_OPERATOR(DagUtilTestDummyAsync, DummyAsyncOp);
-
-OPERATOR_SCHEMA(DagUtilTestDummySync)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX);
-OPERATOR_SCHEMA(DagUtilTestDummyAsync)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX);
-
-class DagUtilTestContext {
- public:
-  DagUtilTestContext(const std::string& spec, Workspace* ws) {
-    net_def_ = std::make_shared<NetDef>();
-    CAFFE_ENFORCE(TextFormat::ParseFromString(spec, net_def_.get()));
-    operator_nodes_ = dag_utils::prepareOperatorNodes(net_def_, ws);
-  }
-
-  dag_utils::ExecutionChains computeChains() {
-    return dag_utils::computeGroups(operator_nodes_);
-  }
-
- private:
-  std::shared_ptr<NetDef> net_def_{nullptr};
-  std::vector<dag_utils::OperatorNode> operator_nodes_;
-};
-
-void PrintChains(const dag_utils::ExecutionChains& chains) {
-  for (const auto& kv : chains) {
-    std::stringstream ss;
-    ss << kv.first << ": ";
-    for (const auto& v : kv.second) {
-      ss << v << ", ";
-    }
-    LOG(INFO) << ss.str();
-  }
-}
-} // namespace
-
-TEST(DagUtilTest, Empty) {
-  const auto spec = R"DOC(
-    name: "test0"
-    type: "async_scheduling"
-    )DOC";
-  Workspace ws;
-  DagUtilTestContext t(spec, &ws);
-  auto chains = t.computeChains();
-  EXPECT_TRUE(chains.empty());
-}
-
-// 4 sync ops forming a diamond
-TEST(DagUtilTest, AllSync) {
-  const auto spec = R"DOC(
-    name: "test1"
-    type: "async_scheduling"
-    external_input: "in"
-    op {
-      input: "in"
-      output: "n1"
-      type: "DagUtilTestDummySync"
-    }
-    op {
-      input: "n1"
-      output: "n2"
-      type: "DagUtilTestDummySync"
-    }
-    op {
-      input: "n1"
-      output: "n3"
-      type: "DagUtilTestDummySync"
-    }
-    op {
-      input: "n2"
-      input: "n3"
-      output: "out"
-      type: "DagUtilTestDummySync"
-    }
-    )DOC";
-  Workspace ws;
-  ws.CreateBlob("in");
-  DagUtilTestContext t(spec, &ws);
-  auto chains = t.computeChains();
-  dag_utils::ExecutionChains expected{{0, {0, 1, 2, 3}}};
-  EXPECT_EQ(chains, expected);
-}
-
-// 3 async ops forming an L shape
-TEST(DagUtilTest, AllAsync) {
-  const auto spec = R"DOC(
-    name: "test2"
-    type: "async_scheduling"
-    external_input: "in0"
-    external_input: "in1"
-    op {
-      input: "in0"
-      output: "n1"
-      type: "DagUtilTestDummyAsync"
-    }
-    op {
-      input: "in1"
-      output: "n2"
-      type: "DagUtilTestDummyAsync"
-    }
-    op {
-      input: "n1"
-      output: "n3"
-      type: "DagUtilTestDummyAsync"
-    }
-    )DOC";
-  Workspace ws;
-  ws.CreateBlob("in0");
-  ws.CreateBlob("in1");
-  DagUtilTestContext t(spec, &ws);
-  auto chains = t.computeChains();
-  dag_utils::ExecutionChains expected{{0, {0}}, {1, {1}}, {2, {2}}};
-  EXPECT_EQ(chains, expected);
-}
-
-// 3 sync ops and 1 async op (#2) forming a diamond
-TEST(DagUtilTest, Mixed0) {
-  const auto spec = R"DOC(
-    name: "test3"
-    type: "async_scheduling"
-    external_input: "in"
-    op {
-      input: "in"
-      output: "n1"
-      type: "DagUtilTestDummySync"
-    }
-    op {
-      input: "n1"
-      output: "n2"
-      type: "DagUtilTestDummySync"
-    }
-    op {
-      input: "n1"
-      output: "n3"
-      type: "DagUtilTestDummyAsync"
-    }
-    op {
-      input: "n2"
-      input: "n3"
-      output: "out"
-      type: "DagUtilTestDummySync"
-    }
-    )DOC";
-  Workspace ws;
-  ws.CreateBlob("in");
-  DagUtilTestContext t(spec, &ws);
-  auto chains = t.computeChains();
-  dag_utils::ExecutionChains expected{{0, {0, 1}}, {2, {2}}, {3, {3}}};
-  EXPECT_EQ(chains, expected);
-}
-
-// 3 sync ops and 1 async op (#2) forming a Y shape
-TEST(DagUtilTest, Mixed1) {
-  const auto spec = R"DOC(
-    name: "test3"
-    type: "async_scheduling"
-    external_input: "in0"
-    external_input: "in1"
-    op {
-      input: "in0"
-      output: "n1"
-      type: "DagUtilTestDummySync"
-    }
-    op {
-      input: "in1"
-      output: "n2"
-      type: "DagUtilTestDummySync"
-    }
-    op {
-      input: "n1"
-      input: "n2"
-      output: "n3"
-      type: "DagUtilTestDummyAsync"
-    }
-    op {
-      input: "n3"
-      output: "out"
-      type: "DagUtilTestDummySync"
-    }
-    )DOC";
-  Workspace ws;
-  ws.CreateBlob("in0");
-  ws.CreateBlob("in1");
-  DagUtilTestContext t(spec, &ws);
-  auto chains = t.computeChains();
-  dag_utils::ExecutionChains expected{{0, {0, 1}}, {2, {2}}, {3, {3}}};
-  EXPECT_EQ(chains, expected);
-}
-// More complicated mixed case. * means async
-//  0* -> 1* -> 2
-//    |
-//  3 -> 4 -> 5
-//  |  |
-//  |    6
-//   - -> 8*
-//  7* -/
-TEST(DagUtilTest, Mixed2) {
-  const auto spec = R"DOC(
-    name: "test4"
-    type: "async_scheduling"
-    external_input: "in0"
-    external_input: "in1"
-    external_input: "in2"
-    op {
-      input: "in0"
-      output: "n1"
-      type: "DagUtilTestDummyAsync"
-    }
-    op {
-      input: "n1"
-      output: "n2"
-      type: "DagUtilTestDummyAsync"
-    }
-    op {
-      input: "n2"
-      output: "out0"
-      type: "DagUtilTestDummySync"
-    }
-    op {
-      input: "in1"
-      output: "n3"
-      type: "DagUtilTestDummySync"
-    }
-    op {
-      input: "n1"
-      input: "n3"
-      output: "n4"
-      type: "DagUtilTestDummySync"
-    }
-    op {
-      input: "n4"
-      output: "out1"
-      type: "DagUtilTestDummySync"
-    }
-    op {
-      input: "n3"
-      output: "out2"
-      type: "DagUtilTestDummySync"
-    }
-    op {
-      input: "in2"
-      output: "n7"
-      type: "DagUtilTestDummyAsync"
-    }
-    op {
-      input: "n3"
-      input: "n7"
-      output: "out3"
-      type: "DagUtilTestDummyAsync"
-    }
-    )DOC";
-  Workspace ws;
-  ws.CreateBlob("in0");
-  ws.CreateBlob("in1");
-  ws.CreateBlob("in2");
-  DagUtilTestContext t(spec, &ws);
-  auto chains = t.computeChains();
-  dag_utils::ExecutionChains expected{
-      {0, {0}}, {1, {1}}, {3, {3, 6}}, {4, {4, 2, 5}}, {7, {7}}, {8, {8}}};
-  EXPECT_EQ(chains, expected);
-}
-} // namespace caffe2
--- a/caffe2/core/net_gpu_test.cc
+++ b/caffe2/core/net_gpu_test.cc
@ -1,130 +0,0 @@
-#include <gtest/gtest.h>
-#include "caffe2/core/common_gpu.h"
-#include "caffe2/core/net.h"
-#include "caffe2/core/net_async_base.h"
-#include "caffe2/core/operator.h"
-#include "caffe2/core/scope_guard.h"
-
-namespace caffe2 {
-
-namespace {
-
-static std::atomic<int> counter;
-
-// A net test dummy op that does nothing but scaffolding. Here, we
-// inherit from OperatorBase because we instantiate on both CPU and
-// GPU. In general, you want to only inherit from Operator<Context>.
-class NetTestDummyOp final : public OperatorBase {
- public:
-  using OperatorBase::OperatorBase;
-
-  NetTestDummyOp(const OperatorDef& operator_def, Workspace* ws)
-      : OperatorBase(operator_def, ws),
-        fail_(OperatorBase::GetSingleArgument<bool>("fail", false)) {}
-
-  bool Run(int /* unused */ /*stream_id*/) override {
-    if (fail_) {
-      return false;
-    }
-    counter.fetch_add(1);
-    return true;
-  }
-
-  // Simulate CUDA operator behavior
-  bool HasAsyncPart() const override {
-    return debug_def().device_option().device_type() == PROTO_CUDA;
-  }
-
-  bool SupportsAsyncScheduling() const override {
-    return debug_def().device_option().device_type() == PROTO_CUDA;
-  }
-
- protected:
-  const bool fail_;
-};
-
-REGISTER_CPU_OPERATOR(NetTestDummy, NetTestDummyOp);
-REGISTER_CUDA_OPERATOR(NetTestDummy, NetTestDummyOp);
-REGISTER_CPU_OPERATOR(NetTestDummy2, NetTestDummyOp);
-REGISTER_CUDA_OPERATOR(NetTestDummy2, NetTestDummyOp);
-
-OPERATOR_SCHEMA(NetTestDummy)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX)
-    .AllowInplace({{0, 0}, {1, 1}});
-OPERATOR_SCHEMA(NetTestDummy2)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX)
-    .AllowInplace({{1, 0}});
-
-}  // namespace
-
-void testExecution(std::unique_ptr<NetBase>& net, int num_ops) {
-  // Run 100 times
-  for (int i = 0; i < 100; i++) {
-    counter.exchange(0);
-    net.get()->Run();
-    ASSERT_EQ(num_ops, counter.load());
-  }
-}
-
-void checkChainingAndRun(
-    const char* spec,
-    const dag_utils::ExecutionChains& expected) {
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef net_def;
-  CAFFE_ENFORCE(TextFormat::ParseFromString(spec, &net_def));
-  {
-    net_def.set_num_workers(4);
-    std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
-    auto* dag = dynamic_cast_if_rtti<AsyncNetBase*>(net.get());
-    TORCH_CHECK_NOTNULL(dag);
-    const auto& chains = dag->TEST_execution_chains();
-    EXPECT_EQ(chains, expected);
-    testExecution(net, net_def.op().size());
-  }
-}
-
-TEST(NetTest, DISABLED_ChainingForDifferentDevices) {
-  const auto spec = R"DOC(
-        name: "example"
-        type: "dag"
-        external_input: "in"
-        op {
-          input: "in"
-          output: "hidden"
-          type: "NetTestDummy"
-        }
-        op {
-          input: "hidden"
-          output: "out"
-          type: "NetTestDummy"
-          device_option {
-            device_type: 1
-          }
-        }
-        op {
-          input: "out"
-          output: "out2"
-          type: "NetTestDummy"
-          device_option {
-            device_type: 1
-          }
-        }
-        op {
-          input: "out2"
-          output: "out3"
-          type: "NetTestDummy"
-          device_option {
-            device_type: 1
-            device_id: 1
-          }
-        }
-)DOC";
-  if (HasCudaGPU() && NumCudaDevices() >= 2) {
-    checkChainingAndRun(spec, {{0, {0, 1, 2}}, {3, {3}}});
-  }
-}
-
-} // namespace caffe2
--- a/caffe2/core/net_simple_refcount_test.cc
+++ b/caffe2/core/net_simple_refcount_test.cc
@ -1,70 +0,0 @@
-#include <gtest/gtest.h>
-#include "c10/util/StringUtil.h"
-#include "caffe2/core/net.h"
-#include "caffe2/core/net_async_scheduling.h"
-#include "caffe2/core/operator.h"
-#include "caffe2/core/scope_guard.h"
-
-#include <google/protobuf/text_format.h>
-
-namespace caffe2 {
-
-namespace {
-
-// A net test dummy op that does nothing but scaffolding. Here, we
-// inherit from OperatorBase because we instantiate on both CPU and
-// GPU. In general, you want to only inherit from Operator<Context>.
-class NetSimpleRefCountTestOp final : public Operator<CPUContext> {
- public:
-  NetSimpleRefCountTestOp(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<CPUContext>(operator_def, ws) {}
-  USE_OPERATOR_FUNCTIONS(CPUContext);
-
-  bool RunOnDevice() override {
-    const int32_t& input = OperatorBase::Input<int32_t>(0);
-    int32_t* output = OperatorBase::Output<int32_t>(0);
-    *output = input + 1;
-    return true;
-  }
-};
-
-REGISTER_CPU_OPERATOR(NetSimpleRefCountTest, NetSimpleRefCountTestOp);
-
-OPERATOR_SCHEMA(NetSimpleRefCountTest).NumInputs(1).NumOutputs(1);
-
-TEST(NetSimpleRefCountTest, TestCorrectness) {
-  Workspace ws;
-  *(ws.CreateBlob("a")->GetMutable<int32_t>()) = 1;
-  NetDef net_def;
-  net_def.set_type("simple_refcount");
-  net_def.add_op()->CopyFrom(
-      CreateOperatorDef("NetSimpleRefCountTest", "", {"a"}, {"b"}));
-  net_def.add_op()->CopyFrom(
-      CreateOperatorDef("NetSimpleRefCountTest", "", {"b"}, {"c"}));
-  net_def.add_op()->CopyFrom(
-      CreateOperatorDef("NetSimpleRefCountTest", "", {"b"}, {"d"}));
-  net_def.add_op()->CopyFrom(
-      CreateOperatorDef("NetSimpleRefCountTest", "", {"c"}, {"e"}));
-  // After execution, what should look like is:
-  // a = 1
-  // b = deallocated
-  // c = deallocated
-  // d = 3
-  // e = 4
-  std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
-  net->Run();
-  // Note on ASSERT vs EXPECT: ASSERT will quit directly if condition not
-  // met, which is why we guard IsType<> calls with ASSERT so that the
-  // subsequent Get() calls do not product an exception.
-  ASSERT_TRUE(ws.GetBlob("a")->IsType<int32_t>());
-  EXPECT_EQ(ws.GetBlob("a")->Get<int32_t>(), 1);
-  EXPECT_EQ(ws.GetBlob("b")->GetRaw(), nullptr);
-  EXPECT_EQ(ws.GetBlob("c")->GetRaw(), nullptr);
-  ASSERT_TRUE(ws.GetBlob("d")->IsType<int32_t>());
-  EXPECT_EQ(ws.GetBlob("d")->Get<int32_t>(), 3);
-  ASSERT_TRUE(ws.GetBlob("e")->IsType<int32_t>());
-  EXPECT_EQ(ws.GetBlob("e")->Get<int32_t>(), 4);
-}
-
-} // namespace
-} // namespace caffe2
--- a/caffe2/core/net_test.cc
+++ b/caffe2/core/net_test.cc
--- a/caffe2/core/observer_test.cc
+++ b/caffe2/core/observer_test.cc
@ -1,183 +0,0 @@
-#include <gtest/gtest.h>
-#include "c10/util/Registry.h"
-#include "caffe2/core/common.h"
-#include "caffe2/core/net.h"
-#include "caffe2/core/net_simple.h"
-#include "caffe2/core/observer.h"
-#include "caffe2/core/operator.h"
-#include "caffe2/core/scope_guard.h"
-
-namespace caffe2 {
-
-namespace {
-
-static std::atomic<int> counter;
-
-template <class T>
-class DummyObserver final : public ObserverBase<T> {
- public:
-  explicit DummyObserver<T>(T* subject_) : ObserverBase<T>(subject_) {}
-  void Start() override;
-  void Stop() override;
-
-  // NOLINTNEXTLINE(modernize-use-equals-default)
-  ~DummyObserver() override {}
-};
-
-template <>
-void DummyObserver<NetBase>::Start() {
-  vector<OperatorBase*> operators = subject_->GetOperators();
-  for (auto& op : operators) {
-    op->AttachObserver(std::make_unique<DummyObserver<OperatorBase>>(op));
-  }
-  counter.fetch_add(1000);
-}
-
-template <>
-void DummyObserver<OperatorBase>::Start() {
-  counter.fetch_add(100);
-}
-
-template <>
-void DummyObserver<NetBase>::Stop() {
-  counter.fetch_add(10);
-}
-
-template <>
-void DummyObserver<OperatorBase>::Stop() {
-  counter.fetch_add(1);
-}
-
-class ObsTestDummyOp final : public OperatorBase {
- public:
-  using OperatorBase::OperatorBase;
-  bool Run(int /* unused */) override {
-    StartAllObservers();
-    StopAllObservers();
-    return true;
-  }
-};
-
-REGISTER_CPU_OPERATOR(ObsTestDummy, ObsTestDummyOp);
-REGISTER_CUDA_OPERATOR(ObsTestDummy, ObsTestDummyOp);
-
-OPERATOR_SCHEMA(ObsTestDummy)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX)
-    .AllowInplace({{0, 0}, {1, 1}});
-
-unique_ptr<NetBase> CreateNetTestHelper(Workspace* ws, bool isDAG = false) {
-  NetDef net_def;
-  if (isDAG) {
-    net_def.set_type("dag");
-  }
-  {
-    auto& op = *(net_def.add_op());
-    op.set_type("ObsTestDummy");
-    op.add_input("in");
-    op.add_output("hidden");
-  }
-  {
-    auto& op = *(net_def.add_op());
-    op.set_type("ObsTestDummy");
-    op.add_input("hidden");
-    op.add_output("out");
-  }
-  net_def.add_external_input("in");
-  net_def.add_external_output("out");
-
-  return CreateNet(net_def, ws);
-}
-}
-
-TEST(ObserverTest, TestNotify) {
-  auto count_before = counter.load();
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef net_def;
-  unique_ptr<NetBase> net(CreateNetTestHelper(&ws));
-  EXPECT_EQ(caffe2::dynamic_cast_if_rtti<SimpleNet*>(net.get()), net.get());
-  unique_ptr<DummyObserver<NetBase>> net_ob =
-      make_unique<DummyObserver<NetBase>>(net.get());
-  net.get()->AttachObserver(std::move(net_ob));
-  net.get()->Run();
-  auto count_after = counter.load();
-  EXPECT_EQ(1212, count_after - count_before);
-}
-
-TEST(ObserverTest, TestUniqueMap) {
-  auto count_before = counter.load();
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef net_def;
-  unique_ptr<NetBase> net(CreateNetTestHelper(&ws));
-  EXPECT_EQ(caffe2::dynamic_cast_if_rtti<SimpleNet*>(net.get()), net.get());
-  unique_ptr<DummyObserver<NetBase>> net_ob =
-      make_unique<DummyObserver<NetBase>>(net.get());
-  auto* ref = net.get()->AttachObserver(std::move(net_ob));
-  net.get()->Run();
-  unique_ptr<Observable<NetBase>::Observer> test =
-      net.get()->DetachObserver(ref);
-  auto count_after = counter.load();
-  EXPECT_EQ(1212, count_after - count_before);
-}
-
-TEST(ObserverTest, TestNotifyAfterDetach) {
-  auto count_before = counter.load();
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef net_def;
-  unique_ptr<NetBase> net(CreateNetTestHelper(&ws));
-  unique_ptr<DummyObserver<NetBase>> net_ob =
-      make_unique<DummyObserver<NetBase>>(net.get());
-  auto* ob = net.get()->AttachObserver(std::move(net_ob));
-  net.get()->DetachObserver(ob);
-  net.get()->Run();
-  auto count_after = counter.load();
-  EXPECT_EQ(0, count_after - count_before);
-}
-
-TEST(ObserverTest, TestDAGNetBase) {
-  auto count_before = counter.load();
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef net_def;
-  unique_ptr<NetBase> net(CreateNetTestHelper(&ws, true));
-  unique_ptr<DummyObserver<NetBase>> net_ob =
-      make_unique<DummyObserver<NetBase>>(net.get());
-  net.get()->AttachObserver(std::move(net_ob));
-  net.get()->Run();
-  auto count_after = counter.load();
-  EXPECT_EQ(1212, count_after - count_before);
-}
-
-#if 0
-// This test intermittently segfaults,
-// see https://github.com/pytorch/pytorch/issues/9137
-TEST(ObserverTest, TestMultipleNetBase) {
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef net_def;
-  unique_ptr<NetBase> net(CreateNetTestHelper(&ws, true));
-  EXPECT_EQ(caffe2::dynamic_cast_if_rtti<NetBase*>(net.get()), net.get());
-
-  // There may be some default observers
-  const size_t prev_num = net.get()->NumObservers();
-  const int num_tests = 100;
-  vector<const Observable<NetBase>::Observer*> observers;
-  for (int i = 0; i < num_tests; ++i) {
-    unique_ptr<DummyObserver<NetBase>> net_ob =
-        make_unique<DummyObserver<NetBase>>(net.get());
-    observers.emplace_back(net.get()->AttachObserver(std::move(net_ob)));
-  }
-
-  net.get()->Run();
-
-  for (const auto& observer : observers) {
-    net.get()->DetachObserver(observer);
-  }
-
-  EXPECT_EQ(net.get()->NumObservers(), prev_num);
-}
-#endif
-} // namespace caffe2
--- a/caffe2/core/operator_gpu_test.cc
+++ b/caffe2/core/operator_gpu_test.cc
@ -1,63 +0,0 @@
-#include <string>
-
-#include <gtest/gtest.h>
-#include "caffe2/core/common_gpu.h"
-#include "caffe2/core/operator.h"
-
-namespace caffe2 {
-
-class JustTest : public OperatorBase {
- public:
-  using OperatorBase::OperatorBase;
-  bool Run(int /* unused */ /*stream_id*/) override {
-    return true;
-  }
-  virtual std::string type() {
-    return "BASE";
-  }
-};
-
-class JustTestCUDA : public JustTest {
- public:
-  using JustTest::JustTest;
-  bool Run(int /* unused */ /*stream_id*/) override {
-    return true;
-  }
-  std::string type() override {
-    return "CUDA";
-  }
-};
-
-class JustTestCUDNN : public JustTest {
- public:
-  using JustTest::JustTest;
-  bool Run(int /* unused */ /*stream_id*/) override {
-    return true;
-  }
-  std::string type() override {
-    return "CUDNN";
-  }
-};
-
-OPERATOR_SCHEMA(JustTest).NumInputs(0, 1).NumOutputs(0, 1);
-REGISTER_CUDA_OPERATOR(JustTest, JustTestCUDA);
-REGISTER_CUDNN_OPERATOR(JustTest, JustTestCUDNN);
-
-TEST(EnginePrefTest, GPUDeviceDefaultPreferredEngines) {
-  if (!HasCudaGPU())
-    return;
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.mutable_device_option()->set_device_type(PROTO_CUDA);
-  op_def.set_type("JustTest");
-
-  {
-    const auto op = CreateOperator(op_def, &ws);
-    EXPECT_NE(nullptr, op.get());
-    // CUDNN should be taken as it's in the default global preferred engines
-    // list
-    EXPECT_EQ(static_cast<JustTest*>(op.get())->type(), "CUDNN");
-  }
-}
-
-} // namespace caffe2
--- a/caffe2/core/operator_schema_test.cc
+++ b/caffe2/core/operator_schema_test.cc
@ -1,279 +0,0 @@
-#include "caffe2/core/logging.h"
-#include "caffe2/core/operator.h"
-#include "caffe2/core/operator_schema.h"
-#include "caffe2/utils/proto_utils.h"
-
-#include <gtest/gtest.h>
-
-namespace caffe2 {
-
-OPERATOR_SCHEMA(OpSchemaTestOp)
-  .NumInputs(1).NumOutputs(1)
-  .SetDoc(R"DOC(Test Documentation)DOC")
-  .Input(0, "in0", "dummy input.")
-  .Output(0, "out0", "dummy output.");
-
-TEST(OperatorSchemaTest, BasicSchema) {
-  const OpSchema* schema = OpSchemaRegistry::Schema("OpSchemaTestOp");
-#ifdef CAFFE2_NO_OPERATOR_SCHEMA
-  EXPECT_TRUE(schema == nullptr);
-  return;
-#endif
-  EXPECT_TRUE(schema != nullptr);
-  EXPECT_TRUE(schema->doc() != nullptr);
-  OperatorDef def1 = CreateOperatorDef(
-      "OpSchemaTestOp", "",
-      vector<string>{"in"}, vector<string>{"out"});
-  EXPECT_TRUE(schema->Verify(def1));
-  OperatorDef def2 = CreateOperatorDef(
-      "OpSchemaTestOp", "",
-      vector<string>{"in1", "in2"}, vector<string>{"out"});
-  EXPECT_FALSE(schema->Verify(def2));
-  OperatorDef def3 = CreateOperatorDef(
-      "OpSchemaTestOp", "",
-      vector<string>{"in"}, vector<string>{"out1", "out2"});
-  EXPECT_FALSE(schema->Verify(def3));
-}
-
-OPERATOR_SCHEMA(OpSchemaSpecifiedInputOutputOp)
-  .NumInputs({2, 4}).NumOutputs({1, 3});
-
-TEST(OperatorSchemaTest, SpecifiedInputOutput) {
-  const OpSchema* schema
-      = OpSchemaRegistry::Schema("OpSchemaSpecifiedInputOutputOp");
-#ifdef CAFFE2_NO_OPERATOR_SCHEMA
-  EXPECT_TRUE(schema == nullptr);
-  return;
-#endif
-  EXPECT_TRUE(schema != nullptr);
-  OperatorDef def1 = CreateOperatorDef(
-      "OpSchemaSpecifiedInputOutputOp", "",
-      vector<string>{"in"}, vector<string>{"out"});
-  EXPECT_FALSE(schema->Verify(def1));
-  OperatorDef def2 = CreateOperatorDef(
-      "OpSchemaSpecifiedInputOutputOp", "",
-      vector<string>{"in1", "in2"}, vector<string>{"out"});
-  EXPECT_TRUE(schema->Verify(def2));
-  OperatorDef def3 = CreateOperatorDef(
-      "OpSchemaSpecifiedInputOutputOp", "",
-      vector<string>{"in1", "in2"}, vector<string>{"out1", "out2"});
-  EXPECT_FALSE(schema->Verify(def3));
-}
-
-OPERATOR_SCHEMA(OpSchemaInputOutputRelationOp)
-    .NumInputsOutputs([](int in, int out) {
-      return out == in || out == in * 2;
-    });
-
-TEST(OperatorSchemaTest, InputOutputRelation) {
-  const OpSchema* schema
-      = OpSchemaRegistry::Schema("OpSchemaInputOutputRelationOp");
-#ifdef CAFFE2_NO_OPERATOR_SCHEMA
-  EXPECT_TRUE(schema == nullptr);
-  return;
-#endif
-  EXPECT_TRUE(schema != nullptr);
-  OperatorDef def1 = CreateOperatorDef(
-      "OpSchemaInputOutputRelationOp", "",
-      vector<string>{"in"}, vector<string>{"out"});
-  EXPECT_TRUE(schema->Verify(def1));
-  OperatorDef def2 = CreateOperatorDef(
-      "OpSchemaInputOutputRelationOp", "",
-      vector<string>{"in"}, vector<string>{"out1", "out2"});
-  EXPECT_TRUE(schema->Verify(def2));
-  OperatorDef def3 = CreateOperatorDef(
-      "OpSchemaInputOutputRelationOp", "",
-      vector<string>{"in1", "in2", "in3"}, vector<string>{"out1", "out2"});
-  EXPECT_FALSE(schema->Verify(def3));
-}
-
-OPERATOR_SCHEMA(OpSchemaSameInputOutputOp)
-    .SameNumberOfOutput();
-
-TEST(OperatorSchemaTest, SameInputOutput) {
-  const OpSchema* schema =
-      OpSchemaRegistry::Schema("OpSchemaSameInputOutputOp");
-#ifdef CAFFE2_NO_OPERATOR_SCHEMA
-  EXPECT_TRUE(schema == nullptr);
-  return;
-#endif
-  OperatorDef def1 = CreateOperatorDef(
-      "OpSchemaSameInputOutputOp", "",
-      vector<string>{"in"}, vector<string>{"out"});
-  EXPECT_TRUE(schema->Verify(def1));
-  OperatorDef def2 = CreateOperatorDef(
-      "OpSchemaSameInputOutputOp", "",
-      vector<string>{"in1", "in2"}, vector<string>{"out1", "out2"});
-  EXPECT_TRUE(schema->Verify(def2));
-  OperatorDef def3 = CreateOperatorDef(
-      "OpSchemaSameInputOutputOp", "",
-      vector<string>{"in1", "in2"}, vector<string>{"out1", "out2", "out3"});
-  EXPECT_FALSE(schema->Verify(def3));
-}
-
-OPERATOR_SCHEMA(OpSchemaCalculateOutputOp)
-    .NumInputs(1, 5).NumOutputs(2, 6)
-    .OutputCalculator([](int n) { return n + 1; });
-
-TEST(OperatorSchemaTest, CalculateOutput) {
-  const OpSchema* schema =
-      OpSchemaRegistry::Schema("OpSchemaCalculateOutputOp");
-#ifdef CAFFE2_NO_OPERATOR_SCHEMA
-  EXPECT_TRUE(schema == nullptr);
-  return;
-#endif
-  OperatorDef def1 = CreateOperatorDef(
-      "OpSchemaCalculateOutputOp", "",
-      vector<string>{"in"}, vector<string>{"out"});
-  EXPECT_FALSE(schema->Verify(def1));
-  OperatorDef def2 = CreateOperatorDef(
-      "OpSchemaCalculateOutputOp", "",
-      vector<string>{"in1", "in2"}, vector<string>{"out1", "out2"});
-  EXPECT_FALSE(schema->Verify(def2));
-  OperatorDef def3 = CreateOperatorDef(
-      "OpSchemaCalculateOutputOp", "",
-      vector<string>{"in1", "in2"}, vector<string>{"out1", "out2", "out3"});
-  EXPECT_TRUE(schema->Verify(def3));
-}
-
-OPERATOR_SCHEMA(OpSchemaInplace)
-    .NumInputs(2).NumOutputs(2)
-    .AllowInplace({{0, 0}})
-    .EnforceInplace({{1, 1}});
-
-TEST(OperatorSchemaTest, Inplace) {
-  const OpSchema* schema =
-      OpSchemaRegistry::Schema("OpSchemaInplace");
-#ifdef CAFFE2_NO_OPERATOR_SCHEMA
-  EXPECT_TRUE(schema == nullptr);
-  return;
-#endif
-  OperatorDef def1 = CreateOperatorDef(
-      "OpSchemaInplace", "",
-      vector<string>{"in1", "in2"}, vector<string>{"out1", "in2"});
-  EXPECT_TRUE(schema->Verify(def1));
-  OperatorDef def2 = CreateOperatorDef(
-      "OpSchemaInplace", "",
-      vector<string>{"in1", "in2"}, vector<string>{"in1", "in2"});
-  EXPECT_TRUE(schema->Verify(def2));
-  OperatorDef def3 = CreateOperatorDef(
-      "OpSchemaInplace", "",
-      vector<string>{"in1", "in2"}, vector<string>{"in1", "out2"});
-  EXPECT_FALSE(schema->Verify(def3));
-  OperatorDef def4 = CreateOperatorDef(
-      "OpSchemaInplace", "",
-      vector<string>{"in1", "in2"}, vector<string>{"out1", "out2"});
-  EXPECT_FALSE(schema->Verify(def4));
-}
-
-OPERATOR_SCHEMA(OpSchemaSameInputOutputTensorInference).IdenticalTypeAndShape();
-
-TEST(OperatorSchemaTest, TensorInferenceIdentical) {
-  const OpSchema* schema =
-      OpSchemaRegistry::Schema("OpSchemaSameInputOutputTensorInference");
-#ifdef CAFFE2_NO_OPERATOR_SCHEMA
-  EXPECT_TRUE(schema == nullptr);
-  return;
-#endif
-  OperatorDef def = CreateOperatorDef(
-      "OpSchemaSameInputOutputTensorInference",
-      "",
-      vector<string>{"in"},
-      vector<string>{"out"});
-  vector<TensorShape> shapes(1);
-  shapes[0].set_data_type(TensorProto::FLOAT);
-  shapes[0].add_dims(1);
-  shapes[0].add_dims(2);
-  shapes[0].add_dims(3);
-  vector<TensorShape> out = schema->InferTensor(def, shapes);
-  EXPECT_EQ(out.size(), 1);
-  EXPECT_EQ(out[0].SerializeAsString(), shapes[0].SerializeAsString());
-}
-
-OPERATOR_SCHEMA(OpSchemaArbitraryTensorInference)
-    .TensorInferenceFunction(
-        [](const OperatorDef&, const vector<TensorShape>&) {
-          vector<TensorShape> shapes(1);
-          shapes[0].set_data_type(TensorProto::FLOAT);
-          shapes[0].add_dims(1701);
-          return shapes;
-        });
-
-TEST(OperatorSchemaTest, TensorInferenceArbitrary) {
-  const OpSchema* schema =
-      OpSchemaRegistry::Schema("OpSchemaArbitraryTensorInference");
-#ifdef CAFFE2_NO_OPERATOR_SCHEMA
-  EXPECT_TRUE(schema == nullptr);
-  return;
-#endif
-  OperatorDef def = CreateOperatorDef(
-      "OpSchemaArbitraryTensorInference",
-      "",
-      vector<string>{"in"},
-      vector<string>{"out"});
-  vector<TensorShape> out = schema->InferTensor(def, vector<TensorShape>());
-  EXPECT_EQ(out.size(), 1);
-  EXPECT_EQ(out[0].data_type(), TensorProto::FLOAT);
-  EXPECT_EQ(out[0].dims_size(), 1);
-  EXPECT_EQ(out[0].dims(0), 1701);
-}
-
-TEST(OperatorSchemaTest, TestCastSchema) {
-  // This tests a use case of the schema: the Cast op takes in the def and
-  // deduces the
-  // schema from the "to" argument.
-  const OpSchema* schema = OpSchemaRegistry::Schema("Cast");
-#ifdef CAFFE2_NO_OPERATOR_SCHEMA
-  EXPECT_TRUE(schema == nullptr);
-  return;
-#endif
-  if (!schema) {
-    // Compiled without the Cast op.
-    return;
-  }
-  OperatorDef def = CreateOperatorDef(
-      "Cast",
-      "",
-      vector<string>{"in"},
-      vector<string>{"out"},
-      vector<Argument>{MakeArgument<int64_t>("to", TensorProto::UINT8)});
-  vector<TensorShape> out = schema->InferTensor(def, vector<TensorShape>(1));
-  EXPECT_EQ(out.size(), 1);
-  // Data type should be inferred.
-  EXPECT_EQ(out[0].data_type(), TensorProto::UINT8);
-  // Dim should not be set (same as input);
-  EXPECT_EQ(out[0].dims_size(), 0);
-}
-
-OPERATOR_SCHEMA(OpSchemaCostInference)
-    .NumInputs(2)
-    .NumOutputs(2)
-    .CostInferenceFunction([](const OperatorDef& /*def*/,
-                              const vector<TensorShape>& inputs) {
-      struct OpSchema::Cost c;
-      c.flops = 2 * inputs[0].dims(0) * inputs[0].dims(1) * inputs[1].dims(1);
-      return c;
-    });
-
-TEST(OperatorSchemaTest, TestCostInference) {
-  const OpSchema* schema = OpSchemaRegistry::Schema("OpSchemaCostInference");
-#ifdef CAFFE2_NO_OPERATOR_SCHEMA
-  EXPECT_TRUE(schema == nullptr);
-  return;
-#endif
-  if (!schema) {
-    return;
-  }
-  OperatorDef def = CreateOperatorDef(
-      "OpSchemaCostInference", "", vector<string>{"in"}, vector<string>{"out"});
-  vector<TensorShape> shapes(2);
-  shapes[0].set_data_type(TensorProto::FLOAT);
-  shapes[0].add_dims(10);
-  shapes[0].add_dims(10);
-  shapes[1].set_data_type(TensorProto::FLOAT);
-  shapes[1].add_dims(10);
-  shapes[1].add_dims(10);
-  EXPECT_EQ(2000, schema->InferCost(def, shapes).flops);
-}
-
-}  // namespace caffe2
--- a/caffe2/core/operator_test.cc
+++ b/caffe2/core/operator_test.cc
@ -1,634 +0,0 @@
-#include <iostream>
-
-#include "caffe2/core/net.h"
-#include "caffe2/core/operator.h"
-#include <gtest/gtest.h>
-
-namespace caffe2 {
-
-// Since we instantiate this on CPU and GPU (but don't want a
-// CUDAContext dependency, we use OperatorBase. In general, you only
-// want to inherit from Operator<Context> in your code.
-class JustTest : public OperatorBase {
- public:
-  using OperatorBase::OperatorBase;
-  bool Run(int /* unused */ /*stream_id*/) override {
-    return true;
-  }
-  virtual string type() {
-    return "base";
-  }
-};
-
-class JustTestAndNeverConstructs : public JustTest {
- public:
-  JustTestAndNeverConstructs(const OperatorDef& def, Workspace* ws)
-      : JustTest(def, ws) {
-    throw UnsupportedOperatorFeature("I just don't construct.");
-  }
-  bool Run(int /* unused */ /*stream_id*/) override {
-    return true;
-  }
-  string type() override {
-    return "FOO";
-  }
-};
-
-class JustTestAndDoesConstruct : public JustTest {
- public:
-  using JustTest::JustTest;
-  bool Run(int /* unused */ /*stream_id*/) override {
-    return true;
-  }
-  string type() override {
-    return "BAR";
-  }
-};
-
-class JustTestWithSomeOutput : public JustTest {
- public:
-  using JustTest::JustTest;
-  bool Run(int /* unused */ /*stream_id*/) override {
-    *OperatorBase::Output<int>(0) = 5;
-    return true;
-  }
-  string type() override {
-    return "SETTING_SOME_OUTPUT";
-  }
-};
-
-OPERATOR_SCHEMA(JustTest).NumInputs(0, 1).NumOutputs(0, 1);
-OPERATOR_SCHEMA(JustTestCPUOnly).NumInputs(0, 1).NumOutputs(0, 1);
-OPERATOR_SCHEMA(JustTestWithSomeOutput);
-
-REGISTER_CPU_OPERATOR(JustTest, JustTest);
-REGISTER_CPU_OPERATOR(JustTestCPUOnly, JustTest);
-REGISTER_CPU_OPERATOR_WITH_ENGINE(JustTest, FOO, JustTestAndNeverConstructs);
-REGISTER_CPU_OPERATOR_WITH_ENGINE(JustTest, BAR, JustTestAndDoesConstruct);
-REGISTER_CPU_OPERATOR_WITH_ENGINE(JustTest, BAZ, JustTestAndDoesConstruct);
-REGISTER_CUDA_OPERATOR(JustTest, JustTest);
-REGISTER_CPU_OPERATOR(JustTestWithSomeOutput, JustTestWithSomeOutput);
-
-TEST(OperatorTest, DeviceTypeRegistryWorks) {
-  EXPECT_EQ(gDeviceTypeRegistry()->count(CPU), 1);
-}
-
-TEST(OperatorTest, RegistryWorks) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTest");
-  unique_ptr<OperatorBase> op = CreateOperator(op_def, &ws);
-  EXPECT_NE(nullptr, op.get());
-  // After introducing events, CUDA operator creation has to have CUDA compiled
-  // as it needs to instantiate an Event object with CUDAContext. Thus we will
-  // guard this test below.
-  if (HasCudaRuntime()) {
-    op_def.mutable_device_option()->set_device_type(PROTO_CUDA);
-    op = CreateOperator(op_def, &ws);
-    EXPECT_NE(nullptr, op.get());
-  }
-}
-
-TEST(OperatorTest, RegistryWrongDevice) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTypeCPUOnly");
-  op_def.mutable_device_option()->set_device_type(PROTO_CUDA);
-  try {
-    CreateOperator(op_def, &ws);
-    LOG(FATAL) << "No exception was thrown";
-  } catch (const std::exception& e) {
-    LOG(INFO) << "Exception " << e.what();
-  }
-}
-
-TEST(OperatorTest, ExceptionWorks) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("ThrowException");
-  unique_ptr<OperatorBase> op = CreateOperator(op_def, &ws);
-  // Note: we do not do ASSERT_THROW in order to print out
-  // the error message for inspection.
-  try {
-    op->Run();
-    // This should not happen - exception should throw above.
-    LOG(FATAL) << "This should not happen.";
-  } catch (const EnforceNotMet& err) {
-    LOG(INFO) << err.what();
-  }
-  try {
-    op->RunAsync();
-    // This should not happen - exception should throw above.
-    LOG(FATAL) << "This should not happen.";
-  } catch (const EnforceNotMet& err) {
-    LOG(INFO) << err.what();
-  }
-}
-
-TEST(OperatorTest, FallbackIfEngineDoesNotBuild) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTest");
-  op_def.set_engine("FOO");
-  unique_ptr<OperatorBase> op = CreateOperator(op_def, &ws);
-  EXPECT_NE(nullptr, op.get());
-  EXPECT_EQ(static_cast<JustTest*>(op.get())->type(), "base");
-}
-
-TEST(OperatorTest, MultipleEngineChoices) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTest");
-  op_def.set_engine("FOO,BAR");
-  unique_ptr<OperatorBase> op = CreateOperator(op_def, &ws);
-  EXPECT_NE(nullptr, op.get());
-  EXPECT_EQ(static_cast<JustTest*>(op.get())->type(), "BAR");
-}
-
-TEST(OperatorTest, CannotUseUninitializedBlob) {
-  Workspace ws;
-  OperatorDef op_def;
-  op_def.set_name("JustTest0");
-  op_def.set_type("JustTest");
-  op_def.add_input("input");
-  op_def.add_output("output");
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  ASSERT_THROW(CreateOperator(op_def, &ws), EnforceNotMet);
-}
-
-TEST(OperatorTest, TestParameterAccess) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_name("JustTest0");
-  op_def.set_type("JustTest");
-  op_def.add_input("input");
-  op_def.add_output("output");
-  AddArgument<float>("arg0", 0.1, &op_def);
-  AddArgument<vector<int>>("arg1", vector<int>{1, 2}, &op_def);
-  AddArgument<string>("arg2", "argstring", &op_def);
-  EXPECT_NE(ws.CreateBlob("input"), nullptr);
-  OperatorBase op(op_def, &ws);
-  EXPECT_FLOAT_EQ(op.GetSingleArgument<float>("arg0", 0.0), 0.1);
-  vector<int> i = op.GetRepeatedArgument<int>("arg1");
-  EXPECT_EQ(i.size(), 2);
-  EXPECT_EQ(i[0], 1);
-  EXPECT_EQ(i[1], 2);
-  EXPECT_EQ(op.GetSingleArgument<string>("arg2", "default"), "argstring");
-  auto default1 = op.GetRepeatedArgument<int>("arg3", {2, 3});
-  EXPECT_EQ(default1.size(), 2);
-  EXPECT_EQ(default1[0], 2);
-  EXPECT_EQ(default1[1], 3);
-  auto default2 = op.GetRepeatedArgument<int>("arg4");
-  EXPECT_EQ(default2.size(), 0);
-}
-
-TEST(OperatorTest, CannotAccessParameterWithWrongType) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_name("JustTest0");
-  op_def.set_type("JustTest");
-  op_def.add_input("input");
-  op_def.add_output("output");
-  AddArgument<float>("arg0", 0.1f, &op_def);
-  EXPECT_NE(ws.CreateBlob("input"), nullptr);
-  OperatorBase op(op_def, &ws);
-  EXPECT_FLOAT_EQ(op.GetSingleArgument<float>("arg0", 0.0), 0.1);
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  ASSERT_THROW(op.GetSingleArgument<int>("arg0", 0), EnforceNotMet);
-}
-
-#if GTEST_HAS_DEATH_TEST
-TEST(OperatorDeathTest, DISABLED_CannotAccessRepeatedParameterWithWrongType) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_name("JustTest0");
-  op_def.set_type("JustTest");
-  op_def.add_input("input");
-  op_def.add_output("output");
-  AddArgument<vector<float>>("arg0", vector<float>{0.1f}, &op_def);
-  EXPECT_NE(ws.CreateBlob("input"), nullptr);
-  OperatorBase op(op_def, &ws);
-  auto args = op.GetRepeatedArgument<float>("arg0");
-  EXPECT_EQ(args.size(), 1);
-  EXPECT_FLOAT_EQ(args[0], 0.1f);
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  EXPECT_DEATH(op.GetRepeatedArgument<int>("arg0"),
-               "Argument does not have the right field: expected ints");
-}
-#endif
-
-TEST(OperatorTest, TestDefaultValue) {
-  OperatorDef op_def;
-  Workspace ws;
-  OperatorBase op(op_def, &ws);
-  EXPECT_FLOAT_EQ(op.GetSingleArgument<float>("arg-nonexisting", 0.5f), 0.5f);
-}
-
-TEST(OperatorTest, TestSetUp) {
-  Workspace ws;
-  OperatorDef op_def;
-  op_def.set_name("JustTest0");
-  op_def.set_type("JustTest");
-  op_def.add_input("input");
-  op_def.add_output("output");
-  EXPECT_NE(nullptr, ws.CreateBlob("input"));
-  unique_ptr<OperatorBase> op(CreateOperator(op_def, &ws));
-  EXPECT_NE(nullptr, op.get());
-  EXPECT_TRUE(ws.HasBlob("output"));
-}
-
-TEST(OperatorTest, TestSetUpInputOutputCount) {
-  Workspace ws;
-  OperatorDef op_def;
-  op_def.set_name("JustTest0");
-  op_def.set_type("JustTest");
-  op_def.add_input("input");
-  op_def.add_input("input2");
-  op_def.add_output("output");
-  EXPECT_NE(nullptr, ws.CreateBlob("input"));
-  EXPECT_NE(nullptr, ws.CreateBlob("input2"));
-#ifndef CAFFE2_NO_OPERATOR_SCHEMA
-  // JustTest will only accept one single input.
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  ASSERT_ANY_THROW(CreateOperator(op_def, &ws));
-#endif
-
-  op_def.clear_input();
-  op_def.add_input("input");
-  op_def.add_output("output2");
-#ifndef CAFFE2_NO_OPERATOR_SCHEMA
-  // JustTest will only produce one single output.
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  ASSERT_ANY_THROW(CreateOperator(op_def, &ws));
-#endif
-}
-
-TEST(OperatorTest, TestOutputValues) {
-  NetDef net_def;
-  net_def.set_name("NetForTest");
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_name("JustTest1");
-  op_def.set_type("JustTestWithSomeOutput");
-  op_def.add_output("output");
-  // JustTest will only produce one single output.
-  net_def.add_op()->CopyFrom(op_def);
-  unique_ptr<NetBase> net(CreateNet(net_def, &ws));
-  EXPECT_TRUE(net->Run());
-  EXPECT_TRUE(ws.HasBlob("output"));
-  EXPECT_EQ(ws.GetBlob("output")->Get<int>(), 5);
-}
-
-NetDef GetNetDefForTest() {
-  NetDef net_def;
-  OperatorDef op_def;
-  net_def.set_name("NetForTest");
-  op_def.set_name("JustTest0");
-  op_def.set_type("JustTest");
-  op_def.add_input("input");
-  op_def.add_output("hidden");
-  net_def.add_op()->CopyFrom(op_def);
-  op_def.set_name("JustTest1");
-  op_def.set_input(0, "hidden");
-  op_def.set_output(0, "output");
-  net_def.add_op()->CopyFrom(op_def);
-  return net_def;
-}
-
-TEST(NetTest, TestScaffoldingSimpleNet) {
-  NetDef net_def = GetNetDefForTest();
-  net_def.set_type("simple");
-  Workspace ws;
-  EXPECT_NE(nullptr, ws.CreateBlob("input"));
-  unique_ptr<NetBase> net(CreateNet(net_def, &ws));
-  EXPECT_NE(nullptr, net.get());
-  EXPECT_TRUE(ws.HasBlob("input"));
-  EXPECT_TRUE(ws.HasBlob("hidden"));
-  EXPECT_TRUE(ws.HasBlob("output"));
-  EXPECT_TRUE(net->Run());
-}
-
-TEST(NetTest, TestScaffoldingDAGNet) {
-  NetDef net_def = GetNetDefForTest();
-  net_def.set_type("dag");
-  net_def.set_num_workers(1);
-  Workspace ws;
-  EXPECT_NE(nullptr, ws.CreateBlob("input"));
-  unique_ptr<NetBase> net(CreateNet(net_def, &ws));
-  EXPECT_NE(nullptr, net.get());
-  EXPECT_TRUE(ws.HasBlob("input"));
-  EXPECT_TRUE(ws.HasBlob("hidden"));
-  EXPECT_TRUE(ws.HasBlob("output"));
-  EXPECT_TRUE(net->Run());
-}
-
-class FooGradientOp : public JustTest {
- public:
-  using JustTest::JustTest;
-  string type() override {
-    return "FooGradient";
-  }
-};
-
-class FooGradientDummyEngineOp : public JustTest {
- public:
-  using JustTest::JustTest;
-  string type() override {
-    return "FooGradientDummyEngine";
-  }
-};
-
-class GetFooGradient : public GradientMakerBase {
-  using GradientMakerBase::GradientMakerBase;
-  vector<OperatorDef> GetGradientDefs() override {
-    return vector<OperatorDef>{
-        CreateOperatorDef(
-            "FooGradient", "",
-            std::vector<string>{GO(0)},
-            std::vector<string>{GI(0)})};
-  }
-};
-
-GRADIENT_OPERATOR_SCHEMA(FooGradient).NumInputs(1).NumOutputs(1);
-REGISTER_CPU_GRADIENT_OPERATOR(FooGradient, FooGradientOp)
-REGISTER_CPU_GRADIENT_OPERATOR_WITH_ENGINE(
-    FooGradient,
-    DUMMY_ENGINE,
-    FooGradientDummyEngineOp)
-REGISTER_GRADIENT(Foo, GetFooGradient);
-
-TEST(OperatorGradientRegistryTest, GradientSimple) {
-  Argument arg = MakeArgument<int>("arg", 1);
-  DeviceOption option;
-  option.set_device_type(PROTO_CPU);
-  OperatorDef def = CreateOperatorDef(
-      "Foo", "", std::vector<string>{"in"}, std::vector<string>{"out"},
-      std::vector<Argument>{arg}, option, "DUMMY_ENGINE");
-  vector<GradientWrapper> g_output(1);
-  g_output[0].dense_ = "out_grad";
-  GradientOpsMeta meta = GetGradientForOp(def, g_output);
-  // Check the names, input and output.
-  EXPECT_EQ(meta.ops_.size(), 1);
-  const OperatorDef& grad_op_def = meta.ops_[0];
-  EXPECT_EQ(grad_op_def.type(), "FooGradient");
-  EXPECT_EQ(grad_op_def.name(), "");
-  EXPECT_EQ(grad_op_def.input_size(), 1);
-  EXPECT_EQ(grad_op_def.output_size(), 1);
-  EXPECT_EQ(grad_op_def.input(0), "out_grad");
-  EXPECT_EQ(grad_op_def.output(0), "in_grad");
-  // Checks the engine, device option and arguments.
-  EXPECT_EQ(grad_op_def.engine(), "DUMMY_ENGINE");
-  EXPECT_EQ(grad_op_def.device_option().device_type(), PROTO_CPU);
-  EXPECT_EQ(grad_op_def.arg_size(), 1);
-  EXPECT_EQ(
-      grad_op_def.arg(0).SerializeAsString(),
-      MakeArgument<int>("arg", 1).SerializeAsString());
-  // Checks the gradient name for input.
-  EXPECT_EQ(meta.g_input_.size(), 1);
-  EXPECT_TRUE(meta.g_input_[0].IsDense());
-  EXPECT_EQ(meta.g_input_[0].dense_, "in_grad");
-
-  Workspace ws;
-  EXPECT_NE(ws.CreateBlob("out_grad"), nullptr);
-  unique_ptr<OperatorBase> grad_op = CreateOperator(grad_op_def, &ws);
-  EXPECT_NE(nullptr, grad_op.get());
-  EXPECT_EQ(
-      static_cast<JustTest*>(grad_op.get())->type(), "FooGradientDummyEngine");
-}
-
-TEST(EnginePrefTest, PerOpEnginePref) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTest");
-
-  SetPerOpEnginePref({{CPU, {{"JustTest", {"BAR"}}}}});
-  {
-    const auto op = CreateOperator(op_def, &ws);
-    EXPECT_NE(nullptr, op.get());
-    EXPECT_EQ(static_cast<JustTest*>(op.get())->type(), "BAR");
-  }
-  // clear
-  SetPerOpEnginePref({});
-
-  // Invalid operator type
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  ASSERT_THROW(
-      SetPerOpEnginePref({{CPU, {{"NO_EXIST", {"BAR"}}}}}), EnforceNotMet);
-}
-
-TEST(EnginePrefTest, GlobalEnginePref) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTest");
-
-  SetGlobalEnginePref({{CPU, {"FOO", "BAR"}}});
-  {
-    const auto op = CreateOperator(op_def, &ws);
-    EXPECT_NE(nullptr, op.get());
-    EXPECT_EQ(static_cast<JustTest*>(op.get())->type(), "BAR");
-  }
-  // clear
-  SetGlobalEnginePref({});
-
-  SetGlobalEnginePref({{CPU, {"FOO"}}});
-  {
-    const auto op = CreateOperator(op_def, &ws);
-    EXPECT_NE(nullptr, op.get());
-    EXPECT_EQ(static_cast<JustTest*>(op.get())->type(), "base");
-  }
-  // clear
-  SetGlobalEnginePref({});
-
-  // Invalid device type
-  // This check is no longer necessary with the enum class
-  // ASSERT_THROW(SetGlobalEnginePref({{8888, {"FOO"}}}), EnforceNotMet);
-}
-
-TEST(EnginePrefTest, GlobalEnginePrefAndPerOpEnginePref) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTest");
-
-  SetPerOpEnginePref({{CPU, {{"JustTest", {"BAR"}}}}});
-  SetGlobalEnginePref({{CPU, {"BAZ"}}});
-  {
-    const auto op = CreateOperator(op_def, &ws);
-    EXPECT_NE(nullptr, op.get());
-    // per op pref takes precedence
-    EXPECT_EQ(static_cast<JustTest*>(op.get())->type(), "BAR");
-  }
-  // clear
-  SetPerOpEnginePref({});
-  SetGlobalEnginePref({});
-}
-
-TEST(EnginePrefTest, GlobalEnginePrefAndPerOpEnginePrefAndOpDef) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTest");
-  op_def.set_engine("BAR");
-
-  SetPerOpEnginePref({{CPU, {{"JustTest", {"BAZ"}}}}});
-  SetGlobalEnginePref({{CPU, {"BAZ"}}});
-  {
-    const auto op = CreateOperator(op_def, &ws);
-    EXPECT_NE(nullptr, op.get());
-    // operator_def takes precedence
-    EXPECT_EQ(static_cast<JustTest*>(op.get())->type(), "BAR");
-  }
-  // clear
-  SetPerOpEnginePref({});
-  SetGlobalEnginePref({});
-}
-
-TEST(EnginePrefTest, SetOpEnginePref) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTest");
-
-  SetPerOpEnginePref({{CPU, {{"JustTest", {"BAZ"}}}}});
-  SetOpEnginePref("JustTest", {{CPU, {"BAR"}}});
-  {
-    const auto op = CreateOperator(op_def, &ws);
-    EXPECT_NE(nullptr, op.get());
-    // operator_def takes precedence
-    EXPECT_EQ(static_cast<JustTest*>(op.get())->type(), "BAR");
-  }
-  // clear
-  SetPerOpEnginePref({});
-  SetGlobalEnginePref({});
-}
-
-TEST(EnginePrefTest, SetDefaultEngine) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTest");
-
-  SetPerOpEnginePref({{CPU, {{"JustTest", {"DEFAULT"}}}}});
-  SetGlobalEnginePref({{CPU, {"BAR"}}});
-  {
-    const auto op = CreateOperator(op_def, &ws);
-    EXPECT_NE(nullptr, op.get());
-    // operator_def takes precedence
-    EXPECT_EQ(static_cast<JustTest*>(op.get())->type(), "base");
-  }
-  // clear
-  SetPerOpEnginePref({});
-  SetGlobalEnginePref({});
-}
-
-class JustTestWithRequiredArg : public JustTest {
- public:
-  using JustTest::JustTest;
-  bool Run(int /* unused */ /*stream_id*/) override {
-    return true;
-  }
-  string type() override {
-    return "JustTestWithRequiredArg";
-  }
-};
-
-REGISTER_CPU_OPERATOR(JustTestWithRequiredArg, JustTestWithRequiredArg);
-OPERATOR_SCHEMA(JustTestWithRequiredArg)
-    .NumInputs(0, 1)
-    .NumOutputs(0, 1)
-    .Arg("test_arg", "this arg is required", true);
-
-TEST(RequiredArg, Basic) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTestWithRequiredArg");
-
-  {
-    try {
-      CreateOperator(op_def, &ws);
-      LOG(FATAL) << "No exception was thrown";
-    } catch (const std::exception& e) {
-      LOG(INFO) << "Exception thrown (expected): " << e.what();
-    }
-  }
-
-  {
-    op_def.add_arg()->CopyFrom(MakeArgument("test_arg", 1));
-    const auto op = CreateOperator(op_def, &ws);
-    EXPECT_NE(nullptr, op.get());
-    EXPECT_EQ(
-        static_cast<JustTest*>(op.get())->type(), "JustTestWithRequiredArg");
-  }
-}
-
-class JustTestWithStandardIsTestArg : public JustTest {
- public:
-  using JustTest::JustTest;
-  bool Run(int /* unused */ /*stream_id*/) override {
-    return true;
-  }
-  string type() override {
-    return "JustTestWithStandardIsTestArg";
-  }
-};
-
-REGISTER_CPU_OPERATOR(
-    JustTestWithStandardIsTestArg,
-    JustTestWithStandardIsTestArg);
-OPERATOR_SCHEMA(JustTestWithStandardIsTestArg)
-    .NumInputs(0, 1)
-    .NumOutputs(0, 1)
-    .ArgIsTest("this is_test arg is required");
-
-TEST(IsTestArg, standard) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTestWithStandardIsTestArg");
-
-  {
-    try {
-      CreateOperator(op_def, &ws);
-      LOG(FATAL) << "No exception was thrown";
-    } catch (const std::exception& e) {
-      LOG(INFO) << "Exception thrown (expected): " << e.what();
-    }
-  }
-
-  {
-    op_def.add_arg()->CopyFrom(MakeArgument(OpSchema::Arg_IsTest, 1));
-    const auto op = CreateOperator(op_def, &ws);
-    EXPECT_NE(nullptr, op.get());
-    EXPECT_EQ(
-        static_cast<JustTest*>(op.get())->type(),
-        "JustTestWithStandardIsTestArg");
-  }
-}
-
-class JustTestWithNonStandardIsTestArg : public JustTest {
- public:
-  using JustTest::JustTest;
-  bool Run(int /* unused */ /*stream_id*/) override {
-    return true;
-  }
-  string type() override {
-    return "JustTestWithNonStandardIsTestArg";
-  }
-};
-
-REGISTER_CPU_OPERATOR(
-    JustTestWithNonStandardIsTestArg,
-    JustTestWithNonStandardIsTestArg);
-OPERATOR_SCHEMA(JustTestWithNonStandardIsTestArg)
-    .NumInputs(0, 1)
-    .NumOutputs(0, 1)
-    .Arg(OpSchema::Arg_IsTest, "this is_test arg is not required");
-
-TEST(IsTestArg, non_standard) {
-  OperatorDef op_def;
-  Workspace ws;
-  op_def.set_type("JustTestWithNonStandardIsTestArg");
-
-  const auto op = CreateOperator(op_def, &ws);
-  EXPECT_NE(nullptr, op.get());
-  EXPECT_EQ(
-      static_cast<JustTest*>(op.get())->type(),
-      "JustTestWithNonStandardIsTestArg");
-}
-
-}  // namespace caffe2
--- a/caffe2/core/parallel_net_test.cc
+++ b/caffe2/core/parallel_net_test.cc
@ -1,322 +0,0 @@
-#include <chrono> // NOLINT
-#include <thread> // NOLINT
-
-#include <gtest/gtest.h>
-#include "caffe2/core/net.h"
-#include "caffe2/core/operator.h"
-
-namespace caffe2 {
-
-// When measuring time, we relax the measured time by +- 40ms.
-#ifndef _WIN32
-const int kTimeThreshold = 40;
-#else
-// Even more so on Windows
-const int kTimeThreshold = 50;
-#endif
-
-// SleepOp basically sleeps for a given number of seconds.
-// We allow arbitrary inputs and at most one output so that we can
-// test scaffolding of networks. If the output is 1, it will be filled with
-// vector<int64_t> with two elements: start time and end time.
-class SleepOp final : public Operator<CPUContext> {
- public:
-  SleepOp(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<CPUContext>(operator_def, ws),
-        ms_(OperatorBase::GetSingleArgument<int>("ms", 1000)) {
-    TORCH_DCHECK_GT(ms_, 0);
-    TORCH_DCHECK_LT(ms_, 3600 * 1000) << "Really? This long?";
-  }
-
-  bool RunOnDevice() override {
-    auto start = std::chrono::high_resolution_clock::now();
-    std::this_thread::sleep_for(std::chrono::milliseconds(ms_));
-    auto end = std::chrono::high_resolution_clock::now();
-    if (OperatorBase::OutputSize()) {
-      vector<int64_t>* output = OperatorBase::Output<vector<int64_t>>(0);
-      output->resize(2);
-      (*output)[0] = start.time_since_epoch().count();
-      (*output)[1] = end.time_since_epoch().count();
-    }
-    return true;
-  }
-
- private:
-  int ms_;
-};
-
-OPERATOR_SCHEMA(Sleep).NumInputs(0, INT_MAX).NumOutputs(0, 1);
-
-REGISTER_CPU_OPERATOR(Sleep, SleepOp);
-REGISTER_CUDA_OPERATOR(Sleep, SleepOp);
-
-// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
-const char kSleepNetDefString[] =
-    "  name: \"sleepnet\""
-    "  type: \"dag\""
-    "  num_workers: 2"
-    "  op {"
-    "    output: \"sleep1\""
-    "    name: \"sleep1\""
-    "    type: \"Sleep\""
-    "    arg {"
-    "      name: \"ms\""
-    "      i: 100"
-    "    }"
-    "  }"
-    "  op {"
-    "    input: \"sleep1\""
-    "    output: \"sleep2\""
-    "    name: \"sleep2\""
-    "    type: \"Sleep\""
-    "    arg {"
-    "      name: \"ms\""
-    "      i: 100"
-    "    }"
-    "  }"
-    "  op {"
-    "    output: \"sleep3\""
-    "    name: \"sleep3\""
-    "    type: \"Sleep\""
-    "    arg {"
-    "      name: \"ms\""
-    "      i: 150"
-    "    }"
-    "  }";
-
-namespace {
-// Run a network and get its duration in milliseconds.
-int RunNetAndGetDuration(const string& net_def_str, const string& type) {
-  NetDef net_def;
-  CAFFE_ENFORCE(TextFormat::ParseFromString(net_def_str, &net_def));
-  net_def.set_type(type);
-  Workspace ws;
-  unique_ptr<NetBase> net(CreateNet(net_def, &ws));
-  CAFFE_ENFORCE(net.get() != nullptr);
-  // Run once to kick in potential initialization (can be slower)
-  CAFFE_ENFORCE(net->Run());
-  // Now run and time it
-  auto start_time = std::chrono::system_clock::now();
-  CAFFE_ENFORCE(net->Run());
-  // Inspect the time - it should be around 200 milliseconds, since sleep3 can
-  // run in parallel with sleep1 and sleep2.
-  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
-      std::chrono::system_clock::now() - start_time);
-  int milliseconds = duration.count();
-  return milliseconds;
-}
-} // namespace
-
-TEST(DAGNetTest, TestDAGNetTiming) {
-  int ms = RunNetAndGetDuration(string(kSleepNetDefString), "dag");
-  EXPECT_NEAR(ms, 200, kTimeThreshold);
-}
-
-// For sanity check, we also test the sequential time - it should take 0.35
-// seconds instead since everything has to be sequential.
-TEST(SimpleNetTest, TestSimpleNetTiming) {
-  int ms = RunNetAndGetDuration(string(kSleepNetDefString), "simple");
-  EXPECT_NEAR(ms, 350, kTimeThreshold);
-}
-
-// This network has two operators reading the same blob at the same time. This
-// should not change anything and the DAG should still make sleep2 and sleep3
-// run in parallel.
-// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
-const char kSleepNetDefStringReadAfterRead[] =
-    "  name: \"sleepnet\""
-    "  type: \"dag\""
-    "  num_workers: 2"
-    "  op {"
-    "    output: \"sleep1\""
-    "    name: \"sleep1\""
-    "    type: \"Sleep\""
-    "    arg {"
-    "      name: \"ms\""
-    "      i: 100"
-    "    }"
-    "  }"
-    "  op {"
-    "    input: \"sleep1\""
-    "    output: \"sleep2\""
-    "    name: \"sleep2\""
-    "    type: \"Sleep\""
-    "    arg {"
-    "      name: \"ms\""
-    "      i: 100"
-    "    }"
-    "  }"
-    "  op {"
-    "    input: \"sleep1\""
-    "    output: \"sleep3\""
-    "    name: \"sleep3\""
-    "    type: \"Sleep\""
-    "    arg {"
-    "      name: \"ms\""
-    "      i: 150"
-    "    }"
-    "  }";
-
-TEST(DAGNetTest, TestDAGNetTimingReadAfterRead) {
-  int ms = RunNetAndGetDuration(string(kSleepNetDefStringReadAfterRead), "dag");
-  EXPECT_NEAR(ms, 250, kTimeThreshold);
-}
-
-// For sanity check, we also test the sequential time - it should take 0.35
-// seconds instead since everything has to be sequential.
-TEST(SimpleNetTest, TestSimpleNetTimingReadAfterRead) {
-  int ms =
-      RunNetAndGetDuration(string(kSleepNetDefStringReadAfterRead), "simple");
-  EXPECT_NEAR(ms, 350, kTimeThreshold);
-}
-
-// This network has two operators writing out the sleep2 blob. As a result, the
-// operator sleep2-again creates a write after write dependency and the whole
-// process should be sequential.
-// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
-const char kSleepNetDefStringWriteAfterWrite[] =
-    "  name: \"sleepnet\""
-    "  type: \"dag\""
-    "  num_workers: 2"
-    "  op {"
-    "    output: \"sleep1\""
-    "    name: \"sleep1\""
-    "    type: \"Sleep\""
-    "    arg {"
-    "      name: \"ms\""
-    "      i: 100"
-    "    }"
-    "  }"
-    "  op {"
-    "    input: \"sleep1\""
-    "    output: \"sleep2\""
-    "    name: \"sleep2\""
-    "    type: \"Sleep\""
-    "    arg {"
-    "      name: \"ms\""
-    "      i: 100"
-    "    }"
-    "  }"
-    "  op {"
-    "    output: \"sleep2\""
-    "    name: \"sleep2-again\""
-    "    type: \"Sleep\""
-    "    arg {"
-    "      name: \"ms\""
-    "      i: 150"
-    "    }"
-    "  }";
-
-TEST(DAGNetTest, TestDAGNetTimingWriteAfterWrite) {
-  int ms =
-      RunNetAndGetDuration(string(kSleepNetDefStringWriteAfterWrite), "dag");
-  EXPECT_NEAR(ms, 350, kTimeThreshold);
-}
-
-TEST(SimpleNetTest, TestSimpleNetTimingWriteAfterWrite) {
-  int ms =
-      RunNetAndGetDuration(string(kSleepNetDefStringWriteAfterWrite), "simple");
-  EXPECT_NEAR(ms, 350, kTimeThreshold);
-}
-
-// This network has an operator writing to sleep1 while another operator is
-// accessing it. As a result, the operator sleep1-again creates a write after
-// read dependency and the whole process should be sequential.
-// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
-const char kSleepNetDefStringWriteAfterRead[] =
-    "  name: \"sleepnet\""
-    "  type: \"dag\""
-    "  num_workers: 2"
-    "  op {"
-    "    output: \"sleep1\""
-    "    name: \"sleep1\""
-    "    type: \"Sleep\""
-    "    arg {"
-    "      name: \"ms\""
-    "      i: 100"
-    "    }"
-    "  }"
-    "  op {"
-    "    input: \"sleep1\""
-    "    output: \"sleep2\""
-    "    name: \"sleep2\""
-    "    type: \"Sleep\""
-    "    arg {"
-    "      name: \"ms\""
-    "      i: 100"
-    "    }"
-    "  }"
-    "  op {"
-    "    output: \"sleep1\""
-    "    name: \"sleep1-again\""
-    "    type: \"Sleep\""
-    "    arg {"
-    "      name: \"ms\""
-    "      i: 150"
-    "    }"
-    "  }";
-
-TEST(DAGNetTest, TestDAGNetTimingWriteAfterRead) {
-  int ms =
-      RunNetAndGetDuration(string(kSleepNetDefStringWriteAfterRead), "dag");
-  EXPECT_NEAR(ms, 350, kTimeThreshold);
-}
-
-TEST(SimpleNetTest, TestSimpleNetTimingWriteAfterRead) {
-  int ms =
-      RunNetAndGetDuration(string(kSleepNetDefStringWriteAfterRead), "simple");
-  EXPECT_NEAR(ms, 350, kTimeThreshold);
-}
-
-// This network has an operator writing to sleep1 while another
-// operator has a control dependency on it. As a result, the operator
-// sleep1-again creates a write after read dependency and the whole
-// process should be sequential.
-// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
-const char kSleepNetDefStringControlDependency[] = R"DOC(
-  name: "sleepnet"
-  type: "dag"
-  num_workers: 2
-  op {
-    output: "sleep1"
-    name: "sleep1"
-    type: "Sleep"
-    arg {
-      name: "ms"
-      i: 100
-    }
-  }
-  op {
-    control_input: "sleep1"
-    output: "sleep2"
-    name: "sleep2"
-    type: "Sleep"
-    arg {
-      name: "ms"
-      i: 100
-    }
-  }
-  op {
-    output: "sleep1"
-    name: "sleep1-again"
-    type: "Sleep"
-    arg {
-      name: "ms"
-      i: 150
-    }
-  }
-)DOC";
-
-TEST(DAGNetTest, TestDAGNetTimingControlDependency) {
-  int ms =
-      RunNetAndGetDuration(string(kSleepNetDefStringControlDependency), "dag");
-  EXPECT_NEAR(ms, 350, kTimeThreshold);
-}
-
-TEST(SimpleNetTest, TestSimpleNetTimingControlDependency) {
-  int ms = RunNetAndGetDuration(
-      string(kSleepNetDefStringControlDependency), "simple");
-  EXPECT_NEAR(ms, 350, kTimeThreshold);
-}
-
-} // namespace caffe2
--- a/caffe2/core/plan_executor_test.cc
+++ b/caffe2/core/plan_executor_test.cc
@ -1,414 +0,0 @@
-#ifndef ANDROID
-
-#include <gtest/gtest.h>
-#include "caffe2/core/init.h"
-#include "caffe2/core/operator.h"
-#include "caffe2/core/plan_executor.h"
-
-namespace caffe2 {
-
-TEST(PlanExecutorTest, EmptyPlan) {
-  PlanDef plan_def;
-  Workspace ws;
-  EXPECT_TRUE(ws.RunPlan(plan_def));
-}
-
-namespace {
-static std::atomic<int> cancelCount{0};
-static std::atomic<bool> stuckRun{false};
-} // namespace
-
-class StuckBlockingOp final : public Operator<CPUContext> {
- public:
-  StuckBlockingOp(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<CPUContext>(operator_def, ws) {}
-
-  bool RunOnDevice() override {
-    // StuckBlockingOp runs and notifies ErrorOp.
-    stuckRun = true;
-
-    while (!cancelled_) {
-      std::this_thread::sleep_for(std::chrono::milliseconds(10));
-    }
-
-    return true;
-  }
-
-  void Cancel() override {
-    LOG(INFO) << "cancelled StuckBlockingOp.";
-    cancelCount += 1;
-    cancelled_ = true;
-  }
-
- private:
-  std::atomic<bool> cancelled_{false};
-};
-
-REGISTER_CPU_OPERATOR(StuckBlocking, StuckBlockingOp);
-OPERATOR_SCHEMA(StuckBlocking).NumInputs(0).NumOutputs(0);
-
-class NoopOp final : public Operator<CPUContext> {
- public:
-  NoopOp(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<CPUContext>(operator_def, ws) {}
-
-  bool RunOnDevice() override {
-    // notify Error op we've ran.
-    stuckRun = true;
-    return true;
-  }
-};
-
-REGISTER_CPU_OPERATOR(Noop, NoopOp);
-OPERATOR_SCHEMA(Noop).NumInputs(0).NumOutputs(0);
-
-
-class StuckAsyncOp final : public Operator<CPUContext> {
- public:
-  StuckAsyncOp(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<CPUContext>(operator_def, ws) {}
-
-  bool RunOnDevice() override {
-    // notify Error op we've ran.
-    stuckRun = true;
-    // explicitly don't call SetFinished so this gets stuck
-    return true;
-  }
-
-  void CancelAsyncCallback() override {
-    LOG(INFO) << "cancelled";
-    cancelCount += 1;
-  }
-
-  bool HasAsyncPart() const override {
-    return true;
-  }
-};
-
-REGISTER_CPU_OPERATOR(StuckAsync, StuckAsyncOp);
-OPERATOR_SCHEMA(StuckAsync).NumInputs(0).NumOutputs(0);
-
-class TestError : public std::exception {
-  const char* what() const noexcept override {
-    return "test error";
-  }
-};
-
-class ErrorOp final : public Operator<CPUContext> {
- public:
-  ErrorOp(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<CPUContext>(operator_def, ws) {}
-
-  bool RunOnDevice() override {
-    // Wait for StuckAsyncOp or StuckBlockingOp to run first.
-    while (!stuckRun) {
-      std::this_thread::sleep_for(std::chrono::milliseconds(10));
-    }
-    throw TestError();
-    return true;
-  }
-};
-
-REGISTER_CPU_OPERATOR(Error, ErrorOp);
-OPERATOR_SCHEMA(Error).NumInputs(0).NumOutputs(0);
-
-static std::atomic<int> blockingErrorRuns{0};
-class BlockingErrorOp final : public Operator<CPUContext> {
- public:
-  BlockingErrorOp(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<CPUContext>(operator_def, ws) {}
-
-  bool RunOnDevice() override {
-    // First n op executions should block and then start throwing errors.
-    if (blockingErrorRuns.fetch_sub(1) >= 1) {
-      LOG(INFO) << "blocking";
-      while (true) {
-        std::this_thread::sleep_for(std::chrono::hours(10));
-      }
-    } else {
-      LOG(INFO) << "throwing";
-      throw TestError();
-    }
-  }
-};
-
-REGISTER_CPU_OPERATOR(BlockingError, BlockingErrorOp);
-OPERATOR_SCHEMA(BlockingError).NumInputs(0).NumOutputs(0);
-
-PlanDef parallelErrorPlan() {
-  PlanDef plan_def;
-
-  auto* stuck_net = plan_def.add_network();
-  stuck_net->set_name("stuck_net");
-  stuck_net->set_type("async_scheduling");
-  {
-    auto* op = stuck_net->add_op();
-    op->set_type("StuckAsync");
-  }
-
-  auto* error_net = plan_def.add_network();
-  error_net->set_name("error_net");
-  error_net->set_type("async_scheduling");
-  {
-    auto op = error_net->add_op();
-    op->set_type("Error");
-  }
-
-  auto* execution_step = plan_def.add_execution_step();
-  execution_step->set_concurrent_substeps(true);
-  {
-    auto* substep = execution_step->add_substep();
-    substep->add_network(stuck_net->name());
-  }
-  {
-    auto* substep = execution_step->add_substep();
-    substep->add_network(error_net->name());
-  }
-
-  return plan_def;
-}
-
-PlanDef parallelErrorPlanWithCancellableStuckNet() {
-  // Set a plan with two nets: one stuck net with blocking operator that never
-  // returns; one error net with error op that throws.
-  PlanDef plan_def;
-
-  auto* stuck_blocking_net = plan_def.add_network();
-  stuck_blocking_net->set_name("stuck_blocking_net");
-  {
-    auto* op = stuck_blocking_net->add_op();
-    op->set_type("StuckBlocking");
-  }
-
-  auto* error_net = plan_def.add_network();
-  error_net->set_name("error_net");
-  {
-    auto* op = error_net->add_op();
-    op->set_type("Error");
-  }
-
-  auto* execution_step = plan_def.add_execution_step();
-  execution_step->set_concurrent_substeps(true);
-  {
-    auto* substep = execution_step->add_substep();
-    substep->add_network(stuck_blocking_net->name());
-  }
-  {
-    auto* substep = execution_step->add_substep();
-    substep->add_network(error_net->name());
-  }
-
-  return plan_def;
-}
-
-PlanDef reporterErrorPlanWithCancellableStuckNet() {
-  // Set a plan with a concurrent net and a reporter net: one stuck net with
-  // blocking operator that never returns; one reporter net with error op
-  // that throws.
-  PlanDef plan_def;
-
-  auto* stuck_blocking_net = plan_def.add_network();
-  stuck_blocking_net->set_name("stuck_blocking_net");
-  {
-    auto* op = stuck_blocking_net->add_op();
-    op->set_type("StuckBlocking");
-  }
-
-  auto* error_net = plan_def.add_network();
-  error_net->set_name("error_net");
-  {
-    auto* op = error_net->add_op();
-    op->set_type("Error");
-  }
-
-  auto* execution_step = plan_def.add_execution_step();
-  execution_step->set_concurrent_substeps(true);
-  {
-    auto* substep = execution_step->add_substep();
-    substep->add_network(stuck_blocking_net->name());
-  }
-  {
-    auto* substep = execution_step->add_substep();
-    substep->set_run_every_ms(1);
-    substep->add_network(error_net->name());
-  }
-
-  return plan_def;
-}
-
-struct HandleExecutorThreadExceptionsGuard {
-  HandleExecutorThreadExceptionsGuard(int timeout = 60) {
-    globalInit({
-        "caffe2",
-        "--caffe2_handle_executor_threads_exceptions=1",
-        "--caffe2_plan_executor_exception_timeout=" +
-            caffe2::to_string(timeout),
-    });
-  }
-
-  ~HandleExecutorThreadExceptionsGuard() {
-    globalInit({
-        "caffe2",
-    });
-  }
-
-  HandleExecutorThreadExceptionsGuard(
-      const HandleExecutorThreadExceptionsGuard&) = delete;
-  void operator=(const HandleExecutorThreadExceptionsGuard&) = delete;
-
- private:
-  void globalInit(std::vector<std::string> args) {
-    std::vector<char*> args_ptrs;
-    for (auto& arg : args) {
-      // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast,performance-inefficient-vector-operation)
-      args_ptrs.push_back(const_cast<char*>(arg.data()));
-    }
-    char** new_argv = args_ptrs.data();
-    int new_argc = args.size();
-    CAFFE_ENFORCE(GlobalInit(&new_argc, &new_argv));
-  }
-};
-
-TEST(PlanExecutorTest, ErrorAsyncPlan) {
-  HandleExecutorThreadExceptionsGuard guard;
-
-  cancelCount = 0;
-  PlanDef plan_def = parallelErrorPlan();
-  Workspace ws;
-  // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
-  ASSERT_THROW(ws.RunPlan(plan_def), TestError);
-  ASSERT_EQ(cancelCount, 1);
-}
-
-// death tests not supported on mobile
-#if !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
-TEST(PlanExecutorTest, BlockingErrorPlan) {
-  // TSAN doesn't play nicely with death tests
-#if defined(__has_feature)
-#if __has_feature(thread_sanitizer)
-  return;
-#endif
-#endif
-
-  testing::GTEST_FLAG(death_test_style) = "threadsafe";
-
-  // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
-  ASSERT_DEATH(
-      [] {
-        HandleExecutorThreadExceptionsGuard guard(/*timeout=*/1);
-
-        PlanDef plan_def;
-
-        std::string plan_def_template = R"DOC(
-          network {
-            name: "net"
-            op {
-              type: "BlockingError"
-            }
-          }
-          execution_step {
-            num_concurrent_instances: 2
-            substep {
-              network: "net"
-            }
-          }
-        )DOC";
-
-        CAFFE_ENFORCE(
-            TextFormat::ParseFromString(plan_def_template, &plan_def));
-        Workspace ws;
-        blockingErrorRuns = 1;
-        ws.RunPlan(plan_def);
-        FAIL() << "shouldn't have reached this point";
-      }(),
-      "failed to stop concurrent workers after exception: test error");
-}
-#endif
-
-TEST(PlanExecutorTest, ErrorPlanWithCancellableStuckNet) {
-  HandleExecutorThreadExceptionsGuard guard;
-
-  cancelCount = 0;
-  PlanDef plan_def = parallelErrorPlanWithCancellableStuckNet();
-  Workspace ws;
-
-  // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
-  ASSERT_THROW(ws.RunPlan(plan_def), TestError);
-  ASSERT_EQ(cancelCount, 1);
-}
-
-TEST(PlanExecutorTest, ReporterErrorPlanWithCancellableStuckNet) {
-  HandleExecutorThreadExceptionsGuard guard;
-
-  cancelCount = 0;
-  PlanDef plan_def = reporterErrorPlanWithCancellableStuckNet();
-  Workspace ws;
-
-  // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
-  ASSERT_THROW(ws.RunPlan(plan_def), TestError);
-  ASSERT_EQ(cancelCount, 1);
-}
-
-PlanDef shouldStopWithCancelPlan() {
-  // Set a plan with a looping net with should_stop_blob set and a concurrent
-  // net that throws an error. The error should cause should_stop to return
-  // false and end the concurrent net.
-  PlanDef plan_def;
-
-  auto* should_stop_net = plan_def.add_network();
-  {
-    auto* op = should_stop_net->add_op();
-    op->set_type("Noop");
-  }
-  should_stop_net->set_name("should_stop_net");
-  should_stop_net->set_type("async_scheduling");
-
-  auto* error_net = plan_def.add_network();
-  error_net->set_name("error_net");
-  {
-    auto* op = error_net->add_op();
-    op->set_type("Error");
-  }
-
-  auto* execution_step = plan_def.add_execution_step();
-  execution_step->set_concurrent_substeps(true);
-  {
-    auto* substep = execution_step->add_substep();
-  execution_step->set_concurrent_substeps(true);
-    substep->set_name("concurrent_should_stop");
-    substep->set_should_stop_blob("should_stop_blob");
-    auto* substep2 = substep->add_substep();
-    substep2->set_name("should_stop_net");
-    substep2->add_network(should_stop_net->name());
-    substep2->set_num_iter(10);
-  }
-  {
-    auto* substep = execution_step->add_substep();
-    substep->set_name("error_step");
-    substep->add_network(error_net->name());
-  }
-
-  return plan_def;
-}
-
-TEST(PlanExecutorTest, ShouldStopWithCancel) {
-  HandleExecutorThreadExceptionsGuard guard;
-
-  stuckRun = false;
-  PlanDef plan_def = shouldStopWithCancelPlan();
-  Workspace ws;
-
-  Blob* blob = ws.CreateBlob("should_stop_blob");
-  Tensor* tensor = BlobGetMutableTensor(blob, CPU);
-  const vector<int64_t>& shape{1};
-  tensor->Resize(shape);
-  tensor->mutable_data<bool>()[0] = false;
-
-  // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
-  ASSERT_THROW(ws.RunPlan(plan_def), TestError);
-  ASSERT_TRUE(stuckRun);
-}
-
-} // namespace caffe2
-
-#endif
--- a/caffe2/core/serialization_test.cc
+++ b/caffe2/core/serialization_test.cc
@ -1,101 +0,0 @@
-#include <gtest/gtest.h>
-
-#include <c10/util/Flags.h>
-#include <c10/util/string_view.h>
-#include "caffe2/core/blob.h"
-#include "caffe2/core/blob_serialization.h"
-
-// NOLINTNEXTLINE: cppcoreguidelines-avoid-c-arrays
-C10_DEFINE_bool(
-    caffe2_test_generate_unknown_dtype_blob,
-    false,
-    "Recompute and log the serialized blob data for the "
-    "TensorSerialization.TestUnknownDType test");
-
-using namespace caffe2;
-
-namespace {
-
-// This data was computed by serializing a 10-element int32_t tensor,
-// but with the data_type field set to 4567.  This allows us to test the
-// behavior of the code when deserializing data from a future version of the
-// code that has new data types that our code does not understand.
-constexpr c10::string_view kFutureDtypeBlob(
-    "\x0a\x09\x74\x65\x73\x74\x5f\x62\x6c\x6f\x62\x12\x06\x54\x65\x6e"
-    "\x73\x6f\x72\x1a\x28\x08\x0a\x08\x01\x10\xd7\x23\x22\x0a\x00\x01"
-    "\x02\x03\x04\x05\x06\x07\x08\x09\x3a\x09\x74\x65\x73\x74\x5f\x62"
-    "\x6c\x6f\x62\x42\x02\x08\x00\x5a\x04\x08\x00\x10\x0a",
-    61);
-// The same tensor with the data_type actually set to TensorProto_DataType_INT32
-constexpr c10::string_view kInt32DtypeBlob(
-    "\x0a\x09\x74\x65\x73\x74\x5f\x62\x6c\x6f\x62\x12\x06\x54\x65\x6e"
-    "\x73\x6f\x72\x1a\x27\x08\x0a\x08\x01\x10\x02\x22\x0a\x00\x01\x02"
-    "\x03\x04\x05\x06\x07\x08\x09\x3a\x09\x74\x65\x73\x74\x5f\x62\x6c"
-    "\x6f\x62\x42\x02\x08\x00\x5a\x04\x08\x00\x10\x0a",
-    60);
-
-void logBlob(c10::string_view data) {
-  constexpr size_t kBytesPerLine = 16;
-  constexpr size_t kCharsPerEncodedByte = 4;
-  std::vector<char> hexStr;
-  hexStr.resize((kBytesPerLine * kCharsPerEncodedByte) + 1);
-  hexStr[kBytesPerLine * kCharsPerEncodedByte] = '\0';
-  size_t lineIdx = 0;
-  for (char c : data) {
-    snprintf(
-        hexStr.data() + (kCharsPerEncodedByte * lineIdx),
-        kCharsPerEncodedByte + 1,
-        "\\x%02x",
-        static_cast<unsigned int>(c));
-    ++lineIdx;
-    if (lineIdx >= kBytesPerLine) {
-      LOG(INFO) << "    \"" << hexStr.data() << "\"";
-      lineIdx = 0;
-    }
-  }
-  if (lineIdx > 0) {
-    hexStr[lineIdx * kCharsPerEncodedByte] = '\0';
-    LOG(INFO) << "    \"" << hexStr.data() << "\"";
-  }
-}
-
-} // namespace
-
-TEST(TensorSerialization, TestUnknownDType) {
-  // This code was used to generate the blob data listed above.
-  constexpr size_t kTestTensorSize = 10;
-  if (FLAGS_caffe2_test_generate_unknown_dtype_blob) {
-    Blob blob;
-    auto* blobTensor = BlobGetMutableTensor(&blob, CPU);
-    blobTensor->Resize(kTestTensorSize, 1);
-    auto *tensorData = blobTensor->mutable_data<int32_t>();
-    for (unsigned n = 0; n < kTestTensorSize; ++n) {
-      tensorData[n] = n;
-    }
-    auto data = SerializeBlob(blob, "test_blob");
-    LOG(INFO) << "test blob: size=" << data.size();
-    logBlob(data);
-  }
-
-  // Test deserializing the normal INT32 data,
-  // just to santity check that deserialization works
-  Blob i32Blob;
-  DeserializeBlob(std::string(kInt32DtypeBlob), &i32Blob);
-  const auto& tensor = BlobGetTensor(i32Blob, c10::DeviceType::CPU);
-  EXPECT_EQ(kTestTensorSize, tensor.numel());
-  EXPECT_EQ(TypeMeta::Make<int32_t>(), tensor.dtype());
-  const auto* tensor_data = tensor.template data<int32_t>();
-  for (unsigned i = 0; i < kTestTensorSize; ++i) {
-    EXPECT_EQ(static_cast<float>(i), tensor_data[i]);
-  }
-
-  // Now test deserializing our blob with an unknown data type
-  Blob futureDtypeBlob;
-  try {
-    DeserializeBlob(std::string(kFutureDtypeBlob), &futureDtypeBlob);
-    FAIL() << "DeserializeBlob() should have failed";
-  } catch (const std::exception& ex) {
-    EXPECT_STREQ(
-        "Cannot deserialize tensor: unrecognized data type", ex.what());
-  }
-}
--- a/caffe2/core/stats_test.cc
+++ b/caffe2/core/stats_test.cc
@ -1,151 +0,0 @@
-#include <chrono>
-#include <iostream>
-#include <thread>
-
-#include "caffe2/core/stats.h"
-#include <gtest/gtest.h>
-
-namespace caffe2 {
-namespace {
-
-struct MyCaffeClass {
-  explicit MyCaffeClass(const std::string& name) : stats_(name) {}
-
-  void tryRun(int) {}
-
-  void run(int numRuns) {
-    try {
-      CAFFE_EVENT(stats_, num_runs, numRuns);
-      tryRun(numRuns);
-      CAFFE_EVENT(stats_, num_successes);
-    } catch (std::exception& e) {
-      CAFFE_EVENT(stats_, num_failures, 1, "arg_to_usdt", e.what());
-    }
-    CAFFE_EVENT(stats_, usdt_only, 1, "arg_to_usdt");
-  }
-
- private:
-  struct MyStats {
-    // NOLINTNEXTLINE(modernize-pass-by-value)
-    CAFFE_STAT_CTOR(MyStats);
-    CAFFE_EXPORTED_STAT(num_runs);
-    CAFFE_EXPORTED_STAT(num_successes);
-    CAFFE_EXPORTED_STAT(num_failures);
-    CAFFE_STAT(usdt_only);
-  } stats_;
-};
-
-ExportedStatMap filterMap(
-    const ExportedStatMap& map,
-    const ExportedStatMap& keys) {
-  ExportedStatMap filtered;
-  for (const auto& kv : map) {
-    if (keys.count(kv.first) > 0) {
-      filtered.insert(kv);
-    }
-  }
-  return filtered;
-}
-
-#define EXPECT_SUBSET(map, sub) EXPECT_EQ(filterMap((map), (sub)), (sub))
-
-TEST(StatsTest, StatsTestClass) {
-  MyCaffeClass a("first");
-  MyCaffeClass b("second");
-  for (int i = 0; i < 10; ++i) {
-    a.run(10);
-    b.run(5);
-  }
-  EXPECT_SUBSET(
-      ExportedStatMap({
-          {"first/num_runs", 100},
-          {"first/num_successes", 10},
-          {"first/num_failures", 0},
-          {"second/num_runs", 50},
-          {"second/num_successes", 10},
-          {"second/num_failures", 0},
-      }),
-      toMap(StatRegistry::get().publish()));
-}
-
-TEST(StatsTest, StatsTestDuration) {
-  struct TestStats {
-    // NOLINTNEXTLINE(modernize-pass-by-value)
-    CAFFE_STAT_CTOR(TestStats);
-    CAFFE_STAT(count);
-    CAFFE_AVG_EXPORTED_STAT(time_ns);
-  };
-  TestStats stats("stats");
-  CAFFE_DURATION(stats, time_ns) {
-    std::this_thread::sleep_for(std::chrono::microseconds(1));
-  }
-
-  ExportedStatList data;
-  StatRegistry::get().publish(data);
-  auto map = toMap(data);
-  auto countIt = map.find("stats/time_ns/count");
-  auto sumIt = map.find("stats/time_ns/sum");
-  EXPECT_TRUE(countIt != map.end() && sumIt != map.end());
-  EXPECT_EQ(countIt->second, 1);
-  EXPECT_GT(sumIt->second, 0);
-}
-
-TEST(StatsTest, StatsTestSimple) {
-  struct TestStats {
-    // NOLINTNEXTLINE(modernize-pass-by-value)
-    CAFFE_STAT_CTOR(TestStats);
-    CAFFE_STAT(s1);
-    CAFFE_STAT(s2);
-    CAFFE_EXPORTED_STAT(s3);
-  };
-  TestStats i1("i1");
-  TestStats i2("i2");
-  CAFFE_EVENT(i1, s1);
-  CAFFE_EVENT(i1, s2);
-  CAFFE_EVENT(i1, s3, 1);
-  CAFFE_EVENT(i1, s3, -1);
-  CAFFE_EVENT(i2, s3, 2);
-
-  ExportedStatList data;
-  StatRegistry::get().publish(data);
-  EXPECT_SUBSET(toMap(data), ExportedStatMap({{"i1/s3", 0}, {"i2/s3", 2}}));
-
-  StatRegistry reg2;
-  reg2.update(data);
-  reg2.update(data);
-
-  EXPECT_SUBSET(
-      toMap(reg2.publish(true)), ExportedStatMap({{"i1/s3", 0}, {"i2/s3", 4}}));
-  EXPECT_SUBSET(
-      toMap(reg2.publish()), ExportedStatMap({{"i1/s3", 0}, {"i2/s3", 0}}));
-}
-
-TEST(StatsTest, StatsTestStatic) {
-  struct TestStats {
-    // NOLINTNEXTLINE(modernize-pass-by-value)
-    CAFFE_STAT_CTOR(TestStats);
-    CAFFE_STATIC_STAT(cpuUsage);
-    CAFFE_STATIC_STAT(memUsage);
-  };
-  TestStats i1("i1");
-  TestStats i2("i2");
-  CAFFE_EVENT(i1, cpuUsage, 95);
-  CAFFE_EVENT(i2, memUsage, 80);
-
-  ExportedStatList data;
-  StatRegistry::get().publish(data);
-  EXPECT_SUBSET(
-      toMap(data), ExportedStatMap({{"i1/cpuUsage", 95}, {"i2/memUsage", 80}}));
-
-  CAFFE_EVENT(i1, cpuUsage, 80);
-  CAFFE_EVENT(i1, memUsage, 50);
-  CAFFE_EVENT(i2, memUsage, 90);
-
-  StatRegistry::get().publish(data);
-  EXPECT_SUBSET(
-      toMap(data),
-      ExportedStatMap(
-          {{"i1/cpuUsage", 80}, {"i1/memUsage", 50}, {"i2/memUsage", 90}}));
-}
-} // namespace
-} // namespace caffe2
--- a/caffe2/core/timer_test.cc
+++ b/caffe2/core/timer_test.cc
@ -1,65 +0,0 @@
-#include <chrono>
-#include <iostream>
-#include <thread>
-
-#include "caffe2/core/timer.h"
-#include <gtest/gtest.h>
-
-namespace caffe2 {
-namespace {
-
-TEST(TimerTest, Test) {
-  Timer timer;
-
-  // A timer auto-starts when it is constructed.
-  std::this_thread::sleep_for(std::chrono::microseconds(1));
-  EXPECT_GT(timer.NanoSeconds(), 0);
-
-  // Sleep for a while, and get the time.
-  timer.Start();
-  std::this_thread::sleep_for(std::chrono::milliseconds(100));
-  float ns = timer.NanoSeconds();
-  float us = timer.MicroSeconds();
-  float ms = timer.MilliSeconds();
-
-  // Time should be at least accurate +- 10%. (30% on Windows)
-#ifndef _WIN32
-  EXPECT_NEAR(ns, 100000000, 10000000);
-  EXPECT_NEAR(us, 100000, 10000);
-  EXPECT_NEAR(ms, 100, 10);
-#else
-  EXPECT_NEAR(ns, 100000000, 30000000);
-  EXPECT_NEAR(us, 100000, 30000);
-  EXPECT_NEAR(ms, 100, 30);
-#endif
-
-  // Test restarting the clock.
-  timer.Start();
-  EXPECT_LT(timer.MicroSeconds(), 1000);
-}
-
-TEST(TimerTest, TestLatency) {
-  constexpr int iter = 1000;
-  float latency = 0;
-  Timer timer;
-  for (int i = 0; i < iter; ++i) {
-    timer.Start();
-    latency += timer.NanoSeconds();
-  }
-  std::cout << "Average nanosecond latency is: " << latency / iter << std::endl;
-  latency = 0;
-  for (int i = 0; i < iter; ++i) {
-    timer.Start();
-    latency += timer.MicroSeconds();
-  }
-  std::cout << "Average microsecond latency is: " << latency / iter << std::endl;
-  latency = 0;
-  for (int i = 0; i < iter; ++i) {
-    timer.Start();
-    latency += timer.MilliSeconds();
-  }
-  std::cout << "Average millisecond latency is: " << latency / iter << std::endl;
-}
-
-}  // namespace
-}  // namespace caffe2
--- a/caffe2/core/transform_test.cc
+++ b/caffe2/core/transform_test.cc
@ -1,460 +0,0 @@
-#include <gtest/gtest.h>
-#include "caffe2/core/net.h"
-#include "caffe2/core/operator.h"
-#include "caffe2/core/transform.h"
-
-namespace caffe2 {
-
-namespace {
-
-using transform::Graph;
-
-static std::atomic<int> counter;
-
-class TransformDummyOp final : public OperatorBase {
- public:
-  using OperatorBase::OperatorBase;
-  bool Run(int /* unused */) override {
-    counter.fetch_add(1);
-    return true;
-  }
-};
-
-REGISTER_CPU_OPERATOR(TransformDummyOp1, TransformDummyOp);
-
-OPERATOR_SCHEMA(TransformDummyOp1)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX)
-    .AllowInplace({{0, 0}, {1, 1}});
-
-REGISTER_CPU_OPERATOR(TransformDummyOp2, TransformDummyOp);
-
-OPERATOR_SCHEMA(TransformDummyOp2)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX)
-    .AllowInplace({{0, 0}, {1, 1}});
-
-REGISTER_CPU_OPERATOR(TransformDummyOp3, TransformDummyOp);
-
-OPERATOR_SCHEMA(TransformDummyOp3)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX)
-    .AllowInplace({{0, 0}, {1, 1}});
-
-/**
- * This TransformDummy transform will find all subgraphs of shape
- * (TransformDummyOp1 -> TransformDummyOp2) and replaces them with
- * (TransformDummyOp3). Simple unit test.
- */
-class DummyTransform : public Transform {
- public:
-  // Finds all patterns of the form (TransformDummyOp1 -> TransformDummyOp2)
-  bool PatternRule(const Graph& g, const std::vector<int>& subgraph, int idx)
-      override {
-    if (subgraph.size() >= pattern_chain.size()) {
-      return false;
-    }
-    // which index are we trying to append the new node to?
-    auto pattern_idx = subgraph.size();
-    // type doesn't match
-    if (g.node(idx).op.type() != pattern_chain[pattern_idx]) {
-      return false;
-    }
-    // not that head, and doesn't have exactly 1 parent
-    if (pattern_idx > 0 && g.node(idx).parents.size() != 1) {
-      return false;
-    }
-    // not that tail, and doesn't have exactly 1 child
-    if (pattern_idx < pattern_chain.size() - 1 &&
-        g.node(idx).children.size() != 1) {
-      return false;
-    }
-
-    return true;
-  }
-
-  // Checks if the subgraph matched is (TransformDummyOp1 -> TransformDummyOp2)
-  bool ValidatorRule(const Graph& g, const std::vector<int>& subgraph)
-      override {
-    if (subgraph.size() == 2) {
-      if (g.node(subgraph[0]).op.type() == "TransformDummyOp1" &&
-          g.node(subgraph[1]).op.type() == "TransformDummyOp2") {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  // Replaces a match of (TransformDummyOp1 -> TransformDummyOp2) with
-  // (TransformDummyOp3)
-  bool ReplaceRule(const std::vector<int>& match, Graph* g_ptr) override {
-    CHECK(g_ptr);
-    auto& g = *g_ptr;
-    OperatorDef new_op;
-    new_op.set_type("TransformDummyOp3");
-    int new_idx = g.size();
-
-    std::map<int, std::vector<string>> new_op_children;
-    std::map<int, std::vector<string>> new_op_parents;
-
-    // for each node parent in the head of the match, connect it to our new node
-    for (const auto& edge : g.node(match[0]).parents) {
-      int parent = edge.first;
-      for (const auto& blob : edge.second) {
-        g.node(parent).children[new_idx].push_back(blob);
-        new_op_parents[parent].push_back(blob);
-      }
-    }
-    for (const string& blob : g.node(match[0]).op.input()) {
-      new_op.add_input(blob);
-    }
-
-    // for each child in the tail of the match, connect it to our new node
-    for (const auto& edge : g.node(match[1]).children) {
-      int child = edge.first;
-      for (const auto& blob : edge.second) {
-        g.node(child).parents[new_idx].push_back(blob);
-        new_op_children[child].push_back(blob);
-      }
-    }
-    for (const string& blob : g.node(match[1]).op.output()) {
-      new_op.add_output(blob);
-    }
-
-    g.DeactivateSubgraph(match);
-
-    g.push_node(transform::Node(new_op, true, new_op_parents, new_op_children));
-    return true;
-  }
-
- private:
-  const std::vector<string> pattern_chain = {"TransformDummyOp1",
-                                             "TransformDummyOp2"};
-};
-
-REGISTER_TRANSFORM(TransformDummySwap, DummyTransform)
-
-TEST(TransformTest, TestPatternMatch) {
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef netdef;
-
-  AddOp(&netdef, "TransformDummyOp1", {"in"}, {"mid1"});
-  AddOp(&netdef, "TransformDummyOp2", {"mid1"}, {"mid2"});
-  AddOp(&netdef, "TransformDummyOp1", {"mid2"}, {"mid3"});
-  AddOp(&netdef, "TransformDummyOp2", {"mid3"}, {"out"});
-
-  auto t = CreateTransform("TransformDummySwap");
-  Graph g(netdef);
-  auto matches = t->PatternMatch(g);
-
-  EXPECT_EQ(matches.size(), 2);
-  EXPECT_EQ(matches[0][0], 0);
-  EXPECT_EQ(matches[0][1], 1);
-  EXPECT_EQ(matches[1][0], 2);
-  EXPECT_EQ(matches[1][1], 3);
-}
-
-TEST(TransformTest, TestReplacePattern) {
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef netdef;
-
-  AddOp(&netdef, "TransformDummyOp1", {"in"}, {"mid1"});
-  AddOp(&netdef, "TransformDummyOp2", {"mid1"}, {"mid2"});
-  AddOp(&netdef, "TransformDummyOp1", {"mid2"}, {"mid3"});
-  AddOp(&netdef, "TransformDummyOp2", {"mid3"}, {"out"});
-
-  auto t = CreateTransform("TransformDummySwap");
-  Graph g(netdef);
-  std::vector<std::vector<int>> matches = {{0, 1}, {2, 3}};
-  t->ReplacePattern(matches, &g);
-
-  EXPECT_EQ(g.size(), 6);
-  EXPECT_FALSE(g.is_node_active(0));
-  EXPECT_FALSE(g.is_node_active(1));
-  EXPECT_FALSE(g.is_node_active(2));
-  EXPECT_FALSE(g.is_node_active(3));
-  EXPECT_TRUE(g.is_node_active(4));
-  EXPECT_TRUE(g.is_node_active(5));
-
-  EXPECT_EQ(g.node(4).children.size(), 1);
-  EXPECT_EQ(g.node(4).parents.size(), 0);
-  EXPECT_TRUE(g.node(4).children.count(5));
-
-  NetDef replaced_netdef = g.GetNetDef();
-
-  EXPECT_EQ(replaced_netdef.op().size(), 2);
-  EXPECT_EQ(replaced_netdef.op(0).type(), "TransformDummyOp3");
-  EXPECT_EQ(replaced_netdef.op(0).input(0), "in");
-  EXPECT_EQ(replaced_netdef.op(1).type(), "TransformDummyOp3");
-  EXPECT_EQ(replaced_netdef.op(1).output(0), "out");
-}
-
-TEST(TransformTest, TestTransformApply) {
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef netdef;
-  AddOp(&netdef, "TransformDummyOp1", {"in"}, {"mid1"});
-  AddOp(&netdef, "TransformDummyOp2", {"mid1"}, {"mid2"});
-  AddOp(&netdef, "TransformDummyOp1", {"mid2"}, {"mid3"});
-  AddOp(&netdef, "TransformDummyOp2", {"mid3"}, {"out"});
-
-  NetDef replaced_netdef = ApplyTransform("TransformDummySwap", netdef);
-
-  EXPECT_EQ(replaced_netdef.op().size(), 2);
-  EXPECT_EQ(replaced_netdef.op(0).type(), "TransformDummyOp3");
-  EXPECT_EQ(replaced_netdef.op(0).input(0), "in");
-  EXPECT_EQ(replaced_netdef.op(1).type(), "TransformDummyOp3");
-  EXPECT_EQ(replaced_netdef.op(1).output(0), "out");
-}
-
-/**
- * Transform with Sorted Order matching.
- * Matches two operators of type TransformDummyOp1, even if disconnected.
- * These operators will be given in execution order,
- * but doesn't need connectivity.
- * Changes them to TransformDummyOp2.
- */
-class SortedDummyTransform : public Transform {
- public:
-  SortedDummyTransform() {
-    SetPatternMatchType(SORTED_WRT_EXECUTION_ORDER);
-  }
-  bool PatternRule(const Graph& g, const std::vector<int>& subgraph, int idx)
-      override {
-    if (g.node(idx).op.type() != "TransformDummyOp1") {
-      return false;
-    }
-    return true;
-  }
-  bool ValidatorRule(const Graph& g, const std::vector<int>& subgraph)
-      override {
-    if (subgraph.size() == 2) {
-      if (g.node(subgraph[0]).op.type() == "TransformDummyOp1" &&
-          g.node(subgraph[1]).op.type() == "TransformDummyOp1") {
-        return true;
-      }
-    }
-    return false;
-  }
-  bool ReplaceRule(const std::vector<int>& match, Graph* g_ptr) override {
-    CHECK(g_ptr);
-    for (const auto& x : match) {
-      g_ptr->node(x).op.set_type("TransformDummyOp2");
-    }
-    return true;
-  }
-};
-
-REGISTER_TRANSFORM(SortedTransformDummySwap, SortedDummyTransform)
-
-TEST(TransformTest, TestPatternMatchTypeSortedOrder) {
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef netdef;
-
-  AddOp(&netdef, "TransformDummyOp1", {"in"}, {"mid1"});
-  AddOp(&netdef, "TransformDummyOp3", {"mid1"}, {"mid2"});
-  AddOp(&netdef, "TransformDummyOp1", {"mid2"}, {"mid3"});
-  AddOp(&netdef, "TransformDummyOp3", {"mid3"}, {"out"});
-
-  auto t = CreateTransform("SortedTransformDummySwap");
-  NetDef replaced_netdef = t->ApplyTo(netdef);
-
-  EXPECT_EQ(replaced_netdef.op().size(), 4);
-  EXPECT_EQ(replaced_netdef.op(0).type(), "TransformDummyOp2");
-  EXPECT_EQ(replaced_netdef.op(2).type(), "TransformDummyOp2");
-}
-
-/**
- * General subgraph transform.
- * Matches a TransformDummyOp1, and a TransformDummyOp2.
- * Order doesn't matter. Connectedness doesn't matter.
- * Turns them into TransformDummyOp3.
- */
-class GeneralDummyTransform : public Transform {
- public:
-  GeneralDummyTransform() {
-    SetPatternMatchType(GENERAL);
-  }
-  bool PatternRule(const Graph& g, const std::vector<int>& subgraph, int idx)
-      override {
-    if (subgraph.size() == 0 && g.node(idx).op.type() == "TransformDummyOp1") {
-      return true;
-    }
-    if (subgraph.size() == 1 && g.node(idx).op.type() == "TransformDummyOp2") {
-      return true;
-    }
-    return false;
-  }
-  bool ValidatorRule(const Graph& g, const std::vector<int>& subgraph)
-      override {
-    if (subgraph.size() == 2) {
-      if (g.node(subgraph[0]).op.type() == "TransformDummyOp1" &&
-          g.node(subgraph[1]).op.type() == "TransformDummyOp2") {
-        return true;
-      }
-    }
-    return false;
-  }
-  bool ReplaceRule(const std::vector<int>& match, Graph* g_ptr) override {
-    CHECK(g_ptr);
-    for (const auto& x : match) {
-      g_ptr->node(x).op.set_type("TransformDummyOp3");
-    }
-    return true;
-  }
-};
-
-REGISTER_TRANSFORM(GeneralTransformDummySwap, GeneralDummyTransform)
-
-TEST(TransformTest, TestPatternMatchTypeGeneral) {
-  Workspace ws;
-  ws.CreateBlob("in");
-  NetDef netdef;
-
-  AddOp(&netdef, "TransformDummyOp2", {"in"}, {"mid1"});
-  AddOp(&netdef, "TransformDummyOp3", {"mid1"}, {"mid2"});
-  AddOp(&netdef, "TransformDummyOp1", {"mid2"}, {"mid3"});
-  AddOp(&netdef, "TransformDummyOp3", {"mid3"}, {"out"});
-
-  auto t = CreateTransform("GeneralTransformDummySwap");
-  NetDef replaced_netdef = t->ApplyTo(netdef);
-
-  EXPECT_EQ(replaced_netdef.op().size(), 4);
-  EXPECT_EQ(replaced_netdef.op(0).type(), "TransformDummyOp3");
-  EXPECT_EQ(replaced_netdef.op(2).type(), "TransformDummyOp3");
-}
-
-class TransformSleepFastOp final : public OperatorBase {
- public:
-  using OperatorBase::OperatorBase;
-  bool Run(int /* unused */) override {
-    std::this_thread::sleep_for(std::chrono::milliseconds(30));
-    return true;
-  }
-};
-
-REGISTER_CPU_OPERATOR(TransformSleepFastOp, TransformSleepFastOp);
-
-OPERATOR_SCHEMA(TransformSleepFastOp)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX)
-    .AllowInplace({{0, 0}, {1, 1}});
-
-class TransformSleepSlowOp final : public OperatorBase {
- public:
-  using OperatorBase::OperatorBase;
-  bool Run(int /* unused */) override {
-    std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    return true;
-  }
-};
-
-REGISTER_CPU_OPERATOR(TransformSleepSlowOp, TransformSleepSlowOp);
-
-OPERATOR_SCHEMA(TransformSleepSlowOp)
-    .NumInputs(0, INT_MAX)
-    .NumOutputs(0, INT_MAX)
-    .AllowInplace({{0, 0}, {1, 1}});
-
-/**
- * This TransformDummy transform will find all operators of type old_type,
- * and replace them with type new_type.
- */
-class TypeSwapTransform : public Transform {
- public:
-  // Determine the actual strings through inheriting from derived type.
-  // NOLINTNEXTLINE(modernize-pass-by-value)
-  explicit TypeSwapTransform(string old_type, string new_type)
-      : old_type(old_type), new_type(new_type) {}
-
-  // Really simple, only accept if it's a FastSleepOp, and no match so far.
-  bool PatternRule(const Graph& g, const std::vector<int>& subgraph, int idx)
-      override {
-    if (subgraph.size() == 0 && g.node(idx).op.type() == old_type) {
-      return true;
-    }
-    return false;
-  }
-  // Checks if the subgraph matched is a FastSleepOp
-  bool ValidatorRule(const Graph& g, const std::vector<int>& subgraph)
-      override {
-    if (subgraph.size() == 1) {
-      if (g.node(subgraph[0]).op.type() == old_type) {
-        return true;
-      }
-    }
-    return false;
-  }
-  // Replaces op of original type to new type.
-  bool ReplaceRule(const std::vector<int>& match, Graph* g_ptr) override {
-    CHECK(g_ptr);
-    auto& g = *g_ptr;
-    g.node(match[0]).op.set_type(new_type);
-    return true;
-  }
-
- private:
-  string old_type;
-  string new_type;
-};
-
-class FastToSlowTransform : public TypeSwapTransform {
- public:
-  explicit FastToSlowTransform()
-      : TypeSwapTransform("TransformSleepFastOp", "TransformSleepSlowOp") {}
-};
-
-REGISTER_TRANSFORM(FastToSlow, FastToSlowTransform);
-
-class SlowToFastTransform : public TypeSwapTransform {
- public:
-  explicit SlowToFastTransform()
-      : TypeSwapTransform("TransformSleepSlowOp", "TransformSleepFastOp") {}
-};
-
-REGISTER_TRANSFORM(SlowToFast, SlowToFastTransform);
-
-TEST(TransformTest, TestApplyTransformIfFasterIsFaster) {
-  NetDef init_netdef;
-  AddOp(&init_netdef, "ConstantFill", {}, {"in"});
-
-  NetDef netdef;
-  AddOp(&netdef, "TransformDummyOp1", {"in"}, {"mid"});
-  AddOp(&netdef, "TransformSleepSlowOp", {"mid"}, {"out"});
-  netdef.add_external_input("in"); // This is important for this function.
-
-  // Make sure the transform would work normally.
-  auto transformed_net = ApplyTransform("SlowToFast", netdef);
-  EXPECT_EQ(transformed_net.op(1).type(), "TransformSleepFastOp");
-
-  // Should be still transform normally.
-  auto mystery_net =
-      ApplyTransformIfFaster("SlowToFast", netdef, init_netdef, 5, 10, 1.01);
-  EXPECT_EQ(mystery_net.op(1).type(), "TransformSleepFastOp");
-}
-
-TEST(TransformTest, TestApplyTransformIfFasterButSlower) {
-  NetDef init_netdef;
-  AddOp(&init_netdef, "ConstantFill", {}, {"in"});
-
-  NetDef netdef;
-  AddOp(&netdef, "TransformDummyOp1", {"in"}, {"mid"});
-  AddOp(&netdef, "TransformSleepFastOp", {"mid"}, {"out"});
-  netdef.add_external_input("in"); // This is important for this function.
-
-  // Make sure the transform would work normally.
-  auto transformed_net = ApplyTransform("FastToSlow", netdef);
-  EXPECT_EQ(transformed_net.op(1).type(), "TransformSleepSlowOp");
-
-  // Should not actually change!
-  auto mystery_net =
-      ApplyTransformIfFaster("FastToSlow", netdef, init_netdef, 5, 10, 1.01);
-  EXPECT_EQ(mystery_net.op(1).type(), "TransformSleepFastOp");
-}
-
-} // namespace
-
-} // namespace caffe2
--- a/caffe2/core/workspace_test.cc
+++ b/caffe2/core/workspace_test.cc
@ -1,149 +0,0 @@
-#include <iostream>
-
-#include "caffe2/core/operator.h"
-#include <gtest/gtest.h>
-
-namespace caffe2 {
-
-class WorkspaceTestFoo {};
-
-CAFFE_KNOWN_TYPE(WorkspaceTestFoo);
-
-TEST(WorkspaceTest, BlobAccess) {
-  Workspace ws;
-
-  EXPECT_FALSE(ws.HasBlob("nonexisting"));
-  EXPECT_EQ(ws.GetBlob("nonexisting"), nullptr);
-
-  EXPECT_EQ(ws.GetBlob("newblob"), nullptr);
-  EXPECT_NE(nullptr, ws.CreateBlob("newblob"));
-  EXPECT_NE(nullptr, ws.GetBlob("newblob"));
-  EXPECT_TRUE(ws.HasBlob("newblob"));
-
-  // Different names should still be not created.
-  EXPECT_FALSE(ws.HasBlob("nonexisting"));
-  EXPECT_EQ(ws.GetBlob("nonexisting"), nullptr);
-
-  // Check if the returned Blob is OK for all operations
-  Blob* blob = ws.GetBlob("newblob");
-  int* int_unused CAFFE2_UNUSED = blob->GetMutable<int>();
-  EXPECT_TRUE(blob->IsType<int>());
-  EXPECT_FALSE(blob->IsType<WorkspaceTestFoo>());
-  EXPECT_NE(&blob->Get<int>(), nullptr);
-
-  // Re-creating the blob does not change the content as long as it already
-  // exists.
-  EXPECT_NE(nullptr, ws.CreateBlob("newblob"));
-  EXPECT_TRUE(blob->IsType<int>());
-  EXPECT_FALSE(blob->IsType<WorkspaceTestFoo>());
-  // When not null, we should only call with the right type.
-  EXPECT_NE(&blob->Get<int>(), nullptr);
-
-  // Re-creating the blob through CreateLocalBlob does not change the content
-  // either.
-  EXPECT_NE(nullptr, ws.CreateLocalBlob("newblob"));
-  EXPECT_TRUE(blob->IsType<int>());
-  EXPECT_NE(&blob->Get<int>(), nullptr);
-
-  // test removing blob
-  EXPECT_FALSE(ws.HasBlob("nonexisting"));
-  EXPECT_FALSE(ws.RemoveBlob("nonexisting"));
-  EXPECT_TRUE(ws.HasBlob("newblob"));
-  EXPECT_TRUE(ws.RemoveBlob("newblob"));
-  EXPECT_FALSE(ws.HasBlob("newblob"));
-}
-
-TEST(WorkspaceTest, RunEmptyPlan) {
-  PlanDef plan_def;
-  Workspace ws;
-  EXPECT_TRUE(ws.RunPlan(plan_def));
-}
-
-TEST(WorkspaceTest, Sharing) {
-  Workspace parent;
-  EXPECT_FALSE(parent.HasBlob("a"));
-  EXPECT_TRUE(parent.CreateBlob("a"));
-  EXPECT_TRUE(parent.GetBlob("a"));
-  {
-    Workspace child(&parent);
-    // Child can access parent blobs
-    EXPECT_TRUE(child.HasBlob("a"));
-    EXPECT_TRUE(child.GetBlob("a"));
-    // Child can create local blobs
-    EXPECT_FALSE(child.HasBlob("b"));
-    EXPECT_FALSE(child.GetBlob("b"));
-    EXPECT_TRUE(child.CreateBlob("b"));
-    EXPECT_TRUE(child.GetBlob("b"));
-    // Parent cannot access child blobs
-    EXPECT_FALSE(parent.GetBlob("b"));
-    EXPECT_FALSE(parent.HasBlob("b"));
-    // Parent can create duplicate names
-    EXPECT_TRUE(parent.CreateBlob("b"));
-    // But child has local overrides
-    EXPECT_NE(child.GetBlob("b"), parent.GetBlob("b"));
-    // Child can create a blob that already exists in the parent
-    EXPECT_TRUE(child.CreateBlob("a"));
-    EXPECT_EQ(child.GetBlob("a"), parent.GetBlob("a"));
-    // Child can create a local blob for the blob already exists in the parent
-    EXPECT_TRUE(child.CreateLocalBlob("a"));
-    // But the local blob will be different from the one in parent workspace
-    EXPECT_NE(child.GetBlob("a"), parent.GetBlob("a"));
-  }
-}
-
-TEST(WorkspaceTest, BlobMapping) {
-  Workspace parent;
-  EXPECT_FALSE(parent.HasBlob("a"));
-  EXPECT_TRUE(parent.CreateBlob("a"));
-  EXPECT_TRUE(parent.GetBlob("a"));
-  {
-    std::unordered_map<string, string> forwarded_blobs;
-    forwarded_blobs["inner_a"] = "a";
-    Workspace child(&parent, forwarded_blobs);
-    EXPECT_FALSE(child.HasBlob("a"));
-    EXPECT_TRUE(child.HasBlob("inner_a"));
-    EXPECT_TRUE(child.GetBlob("inner_a"));
-    Workspace ws;
-    EXPECT_TRUE(ws.CreateBlob("b"));
-    forwarded_blobs.clear();
-    forwarded_blobs["inner_b"] = "b";
-    child.AddBlobMapping(&ws, forwarded_blobs);
-    EXPECT_FALSE(child.HasBlob("b"));
-    EXPECT_TRUE(child.HasBlob("inner_b"));
-    EXPECT_TRUE(child.GetBlob("inner_b"));
-  }
-}
-
-/**
- * Checks that Workspace::ForEach(f) applies f on  the specified set of
- * workspaces in any order.
- */
-static void forEachCheck(std::initializer_list<Workspace*> workspaces) {
-  std::unordered_set<Workspace*> expected(workspaces);
-  std::unordered_set<Workspace*> actual;
-  Workspace::ForEach([&](Workspace* ws) {
-    auto inserted = actual.insert(ws).second;
-    EXPECT_TRUE(inserted);
-  });
-  EXPECT_EQ(actual, expected);
-}
-
-TEST(WorkspaceTest, ForEach) {
-  forEachCheck({});
-
-  {
-    Workspace ws1;
-    forEachCheck({&ws1});
-
-    {
-      Workspace ws2;
-      forEachCheck({&ws1, &ws2});
-    }
-
-    forEachCheck({&ws1});
-  }
-
-  forEachCheck({});
-}
-
-}  // namespace caffe2
--- a/caffe2/release-notes.md
+++ b/caffe2/release-notes.md
@ -1,175 +0,0 @@
-# Caffe2 v0.7.0 Release Notes
-
-## Installation
-
-This build is confirmed for:
-
-* Ubuntu 14.04
-* Ubuntu 16.06
-
-### Required Dependencies
-
-```bash
-sudo apt-get update
-sudo apt-get install -y --no-install-recommends \
-      build-essential \
-      cmake \
-      git \
-      libgoogle-glog-dev \
-      libprotobuf-dev \
-      protobuf-compiler \
-      python-dev \
-      python-pip
-sudo pip install numpy protobuf
-```
-
-### Optional GPU Support
-
-If you plan to use GPU instead of CPU only, then you should install NVIDIA CUDA and cuDNN, a GPU-accelerated library of primitives for deep neural networks.
-[NVIDIA's detailed instructions](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu-installation) or if you're feeling lucky try the quick install set of commands below.
-
-**Update your graphics card drivers first!** Otherwise you may suffer from a wide range of difficult to diagnose errors.
-
-**For Ubuntu 14.04**
-
-```bash
-sudo apt-get update && sudo apt-get install wget -y --no-install-recommends
-wget "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_8.0.61-1_amd64.deb"
-sudo dpkg -i cuda-repo-ubuntu1404_8.0.61-1_amd64.deb
-sudo apt-get update
-sudo apt-get install cuda
-```
-
-**For Ubuntu 16.04**
-
-```bash
-sudo apt-get update && sudo apt-get install wget -y --no-install-recommends
-wget "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-repo-ubuntu1604_8.0.61-1_amd64.deb"
-sudo dpkg -i cuda-repo-ubuntu1604_8.0.61-1_amd64.deb
-sudo apt-get update
-sudo apt-get install cuda
-```
-
-#### Install cuDNN (all Ubuntu versions)
-
-```
-CUDNN_URL="http://developer.download.nvidia.com/compute/redist/cudnn/v5.1/cudnn-8.0-linux-x64-v5.1.tgz"
-wget ${CUDNN_URL}
-sudo tar -xzf cudnn-8.0-linux-x64-v5.1.tgz -C /usr/local
-rm cudnn-8.0-linux-x64-v5.1.tgz && sudo ldconfig
-```
-
-### Optional Dependencies
-
-> Note `libgflags2` is for Ubuntu 14.04. `libgflags-dev` is for Ubuntu 16.04.
-
-```bash
-# for Ubuntu 14.04
-sudo apt-get install -y --no-install-recommends libgflags2
-```
-
-```bash
-# for Ubuntu 16.04
-sudo apt-get install -y --no-install-recommends libgflags-dev
-```
-
-```bash
-# for both Ubuntu 14.04 and 16.04
-sudo apt-get install -y --no-install-recommends \
-      libgtest-dev \
-      libiomp-dev \
-      libleveldb-dev \
-      liblmdb-dev \
-      libopencv-dev \
-      libopenmpi-dev \
-      libsnappy-dev \
-      openmpi-bin \
-      openmpi-doc \
-      python-pydot
-sudo pip install \
-      flask \
-      graphviz \
-      hypothesis \
-      jupyter \
-      matplotlib \
-      pydot python-nvd3 \
-      pyyaml \
-      requests \
-      scikit-image \
-      scipy \
-      setuptools \
-      tornado
-```
-
-### Clone & Build
-
-```bash
-git clone --recursive https://github.com/caffe2/caffe2.git && cd caffe2
-make && cd build && sudo make install
-python -c 'from caffe2.python import core' 2>/dev/null && echo "Success" || echo "Failure"
-```
-
-Run this command below to test if your GPU build was a success. You will get a test output either way, but it will warn you at the top of the output if CPU was used instead along with other errors like missing libraries.
-
-```bash
-python -m caffe2.python.operator_test.relu_op_test
-```
-
-### Environment Variables
-
-These environment variables may assist you depending on your current configuration. When using the install instructions above on the AWS Deep Learning AMI you don't need to set these variables. However, our Docker scripts built on Ubuntu-14.04 or NVIDIA's CUDA images seem to benefit from having these set. If you ran into problems with the build tests above then these are good things to check. Echo them first and see what you have and possibly append or replace with these directories. Also visit the troubleshooting section below.
-
-```bash
-echo $PYTHONPATH
-# export PYTHONPATH=/usr/local:$PYTHONPATH
-# export PYTHONPATH=$PYTHONPATH:/home/ubuntu/caffe2/build
-echo $LD_LIBRARY_PATH
-# export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
-```
-
-### Setting Up Tutorials & Jupyter Server
-
-If you're running this all on a cloud computer, you probably won't have a UI or way to view the IPython notebooks by default. Typically, you would launch them locally with `ipython notebook` and you would see a localhost:8888 webpage pop up with the directory of notebooks running. The following example will show you how to launch the Jupyter server and connect to remotely via an SSH tunnel.
-
-First configure your cloud server to accept port 8889, or whatever you want, but change the port in the following commands. On AWS you accomplish this by adding a rule to your server's security group allowing a TCP inbound on port 8889. Otherwise you would adjust iptables for this.
-
-Next you launch the Jupyter server.
-
-```
-jupyter notebook --no-browser --port=8889
-```
-
-Then create the SSH tunnel. This will pass the cloud server's Jupyter instance to your localhost 8888 port for you to use locally. The example below is templated after how you would connect AWS, where `your-public-cert.pem` is your own public certificate and `ubuntu@super-rad-GPU-instance.compute-1.amazonaws.com` is your login to your cloud server. You can easily grab this on AWS by going to Instances > Connect and copy the part after `ssh` and swap that out in the command below.
-
-```
-ssh -N -f -L localhost:8888:localhost:8889 -i "your-public-cert.pem" ubuntu@super-rad-GPU-instance.compute-1.amazonaws.com
-```
-
-### Troubleshooting
-
-|Python errors||
-|----|-----|
-|Python version | [Python](https://www.python.org/) is core to run Caffe2. We currently require [Python2.7](https://www.python.org/download/releases/2.7/). *Ubuntu 14.04 and greater have Python built in by default*, and that can be used to run Caffe2. To check your version: `python --version`|
-|Solution | If you want the developer version of python, you could install the `dev` package for Python: `sudo apt-get install python-dev`|
-|Python environment | You may have another version of Python installed or need to support Python version 3 for other projects.|
-|Solution | Try virtualenv or Anaconda. The [Anaconda](https://www.continuum.io/downloads) platform provides a single script to install many of the necessary packages for Caffe2, including Python. Using Anaconda is outside the scope of these instructions, but if you are interested, it may work well for you.|
-|pip version | If you plan to use Python with Caffe2 then you need pip.|
-|Solution | `sudo apt-get install python-pip` and also try using pip2 instead of pip.|
-|"AttributeError: 'module' object has no attribute 'MakeArgument'" | Occurs when calling `core.CreateOperator`|
-|Solution | Check your install directory (`/usr/local/`), and remove the folder `/caffe2/python/utils`|
-
-|Building from source||
-|----|-----|
-|OS version | Caffe2 requires Ubuntu 14.04 or greater.|
-|git | While you can download the Caffe2 source code and submodules directly from GitHub as a zip, using git makes it much easier.|
-|Solution | `sudo apt-get install git`|
-|protobuf | You may experience an error related to protobuf during the make step.|
-|Solution | Make sure you've installed protobuf in **both** of these two ways: `sudo apt-get install libprotobuf-dev protobuf-compiler && sudo pip install protobuf`|
-|libgflags2 error | This optional dependency is for Ubuntu 14.04.|
-|Solution | Use `apt-get install libgflags-dev` for Ubuntu 16.04.|
-
-|GPU Support||
-|----|-----|
-|GPU errors | Unsupported GPU or wrong version|
-|Solution | You need to know the specific `deb` for your version of Linux. `sudo dpkg -i| |cuda-repo-<distro>_<version>_<architecture>.deb` Refer to NVIDIA's [installation guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu-installation).|
-|Build issues | Be warned that installing CUDA and cuDNN will increase the size of your build by about 4GB, so plan to have at least 12GB for your Ubuntu disk size.|
--- a/caffe2/requirements.txt
+++ b/caffe2/requirements.txt
@ -1,4 +0,0 @@
-numpy
-enum34
-pyyaml
-requests