Fix caffe2 eigen + cuda9 windows build (#6746)

2025-12-06 12:20:52 +01:00 · 2018-04-22 09:36:09 -07:00 · 2018-04-22 09:36:09 -07:00 · 0d0dcde5a8
commit 0d0dcde5a8
parent 4e8e13d90c
8 changed files with 64 additions and 63 deletions
--- a/caffe2/operators/arg_ops.cu
+++ b/caffe2/operators/arg_ops.cu
@ -45,13 +45,13 @@ __global__ void ComputeArgCUDAKernel(

 } // namespace

-template <typename T>
-class ArgMaxOp<T, CUDAContext> final : public ArgOpBase<T, CUDAContext> {
+template <typename T, typename Context>
+class ArgMaxCudaOp final : public ArgOpBase<T, Context> {
 public:
-  USE_OPERATOR_FUNCTIONS(CUDAContext);
+  USE_OPERATOR_FUNCTIONS(Context);

-  ArgMaxOp(const OperatorDef& operator_def, Workspace* ws)
-      : ArgOpBase<T, CUDAContext>(operator_def, ws) {}
+  ArgMaxCudaOp(const OperatorDef& operator_def, Workspace* ws)
+      : ArgOpBase<T, Context>(operator_def, ws) {}

 protected:
  bool Compute(
@ -62,8 +62,8 @@ class ArgMaxOp<T, CUDAContext> final : public ArgOpBase<T, CUDAContext> {
      TIndex* Y) override;
 };

-template <typename T>
-bool ArgMaxOp<T, CUDAContext>::Compute(
+template <typename T, typename Context>
+bool ArgMaxCudaOp<T, Context>::Compute(
    const T* X,
    const TIndex prev_size,
    const TIndex next_size,
@ -85,13 +85,13 @@ bool ArgMaxOp<T, CUDAContext>::Compute(
  return true;
 }

-template <typename T>
-class ArgMinOp<T, CUDAContext> final : public ArgOpBase<T, CUDAContext> {
+template <typename T, typename Context>
+class ArgMinCudaOp final : public ArgOpBase<T, Context> {
 public:
-  USE_OPERATOR_FUNCTIONS(CUDAContext);
+  USE_OPERATOR_FUNCTIONS(Context);

-  ArgMinOp(const OperatorDef& operator_def, Workspace* ws)
-      : ArgOpBase<T, CUDAContext>(operator_def, ws) {}
+  ArgMinCudaOp(const OperatorDef& operator_def, Workspace* ws)
+      : ArgOpBase<T, Context>(operator_def, ws) {}

 protected:
  bool Compute(
@ -102,8 +102,8 @@ class ArgMinOp<T, CUDAContext> final : public ArgOpBase<T, CUDAContext> {
      TIndex* Y) override;
 };

-template <typename T>
-bool ArgMinOp<T, CUDAContext>::Compute(
+template <typename T, typename Context>
+bool ArgMinCudaOp<T, Context>::Compute(
    const T* X,
    const TIndex prev_size,
    const TIndex next_size,
@ -125,7 +125,7 @@ bool ArgMinOp<T, CUDAContext>::Compute(
  return true;
 }

-REGISTER_CUDA_OPERATOR(ArgMax, ArgMaxOp<float, CUDAContext>);
-REGISTER_CUDA_OPERATOR(ArgMin, ArgMinOp<float, CUDAContext>);
+REGISTER_CUDA_OPERATOR(ArgMax, ArgMaxCudaOp<float, CUDAContext>);
+REGISTER_CUDA_OPERATOR(ArgMin, ArgMinCudaOp<float, CUDAContext>);

 } // namespace caffe2
--- a/caffe2/operators/reduce_ops.cu
+++ b/caffe2/operators/reduce_ops.cu
@ -180,14 +180,14 @@ void ComputeReduceMinMaxGradientCUDA(

 } // namespace

-template <typename T>
-class ReduceMinMaxGradientOp<T, CUDAContext> final
-    : public ReduceGradientOpBase<T, CUDAContext> {
+template <typename T, typename Context>
+class ReduceMinMaxGradientCudaOp final
+    : public ReduceGradientOpBase<T, Context> {
 public:
-  USE_OPERATOR_FUNCTIONS(CUDAContext);
+  USE_OPERATOR_FUNCTIONS(Context);

-  ReduceMinMaxGradientOp(const OperatorDef& operator_def, Workspace* ws)
-      : ReduceGradientOpBase<T, CUDAContext>(operator_def, ws) {}
+  ReduceMinMaxGradientCudaOp(const OperatorDef& operator_def, Workspace* ws)
+      : ReduceGradientOpBase<T, Context>(operator_def, ws) {}

 protected:
  bool Compute(
@ -207,12 +207,12 @@ class ReduceMinMaxGradientOp<T, CUDAContext> final
 REGISTER_CUDA_OPERATOR(ReduceMin, ReduceMinOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(
    ReduceMinGradient,
-    ReduceMinMaxGradientOp<float, CUDAContext>);
+    ReduceMinMaxGradientCudaOp<float, CUDAContext>);

 REGISTER_CUDA_OPERATOR(ReduceMax, ReduceMaxOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(
    ReduceMaxGradient,
-    ReduceMinMaxGradientOp<float, CUDAContext>);
+    ReduceMinMaxGradientCudaOp<float, CUDAContext>);

 REGISTER_CUDA_OPERATOR(ReduceSum, ReduceSumOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(
--- a/caffe2/operators/top_k.cu
+++ b/caffe2/operators/top_k.cu
@ -139,19 +139,19 @@ __global__ void SetTopKGradientCUDAKernel(

 } // namespace

-template <typename T>
-class TopKOp<T, CUDAContext> : public Operator<CUDAContext> {
+template <typename T, typename Context>
+class TopKCudaOp : public Operator<Context> {
 public:
-  USE_OPERATOR_FUNCTIONS(CUDAContext);
+  USE_OPERATOR_FUNCTIONS(Context);

-  TopKOp(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<CUDAContext>(operator_def, ws),
+  TopKCudaOp(const OperatorDef& operator_def, Workspace* ws)
+      : Operator<Context>(operator_def, ws),
        OP_SINGLE_ARG(int, "k", k_, -1),
        OP_SINGLE_ARG(int, "axis", axis_, -1) {
    CAFFE_ENFORCE(k_ >= 1, "k argument must be >= 1");
  }

-  ~TopKOp(){};
+  ~TopKCudaOp(){};

  bool RunOnDevice() override;

@ -160,22 +160,22 @@ class TopKOp<T, CUDAContext> : public Operator<CUDAContext> {
  int axis_;

  // Buffers for CUDAContext.
-  TensorCUDA input_transposed_buffer_;
-  TensorCUDA values_transposed_buffer_;
-  TensorCUDA indices_transposed_buffer_;
+  Tensor<Context> input_transposed_buffer_;
+  Tensor<Context> values_transposed_buffer_;
+  Tensor<Context> indices_transposed_buffer_;

  // Shape tensors on device for CUDAContext.
-  TensorCUDA input_dims_device_;
-  TensorCUDA input_transposed_dims_device_;
-  TensorCUDA input_axes_device_;
+  Tensor<Context> input_dims_device_;
+  Tensor<Context> input_transposed_dims_device_;
+  Tensor<Context> input_axes_device_;

-  TensorCUDA output_dims_device_;
-  TensorCUDA output_transposed_dims_device_;
-  TensorCUDA output_transposed_axes_device_;
+  Tensor<Context> output_dims_device_;
+  Tensor<Context> output_transposed_dims_device_;
+  Tensor<Context> output_transposed_axes_device_;
 };

-template <typename T>
-bool TopKOp<T, CUDAContext>::RunOnDevice() {
+template <typename T, typename Context>
+bool TopKCudaOp<T, Context>::RunOnDevice() {
  const auto& input = Input(0);
  auto* values = Output(0);
  auto* indices = Output(1);
@ -234,11 +234,11 @@ bool TopKOp<T, CUDAContext>::RunOnDevice() {
        dims.data(),
        axes.data(),
        input.template data<T>(),
-        input_transposed_buffer_.mutable_data<T>(),
+        input_transposed_buffer_.template mutable_data<T>(),
        &context_);
-    input_data = input_transposed_buffer_.data<T>();
-    values_data = values_transposed_buffer_.mutable_data<T>();
-    indices_data = indices_transposed_buffer_.mutable_data<TIndex>();
+    input_data = input_transposed_buffer_.template data<T>();
+    values_data = values_transposed_buffer_.template mutable_data<T>();
+    indices_data = indices_transposed_buffer_.template mutable_data<TIndex>();
  }
  RunTopKOnLastDimCUDAImpl<T>(
      input_data,
@ -256,14 +256,14 @@ bool TopKOp<T, CUDAContext>::RunOnDevice() {
        3,
        dims.data(),
        axes.data(),
-        values_transposed_buffer_.data<T>(),
+        values_transposed_buffer_.template data<T>(),
        values->template mutable_data<T>(),
        &context_);
    math::Transpose(
        3,
        dims.data(),
        axes.data(),
-        indices_transposed_buffer_.data<TIndex>(),
+        indices_transposed_buffer_.template data<TIndex>(),
        indices->template mutable_data<TIndex>(),
        &context_);
  }
@ -285,18 +285,18 @@ bool TopKOp<T, CUDAContext>::RunOnDevice() {
  return true;
 }

-REGISTER_CUDA_OPERATOR(TopK, TopKOp<float, CUDAContext>);
+REGISTER_CUDA_OPERATOR(TopK, TopKCudaOp<float, CUDAContext>);

-template <typename T>
-class TopKGradientOp<T, CUDAContext> : public Operator<CUDAContext> {
+template <typename T, typename Context>
+class TopKGradientCudaOp : public Operator<Context> {
 public:
-  USE_OPERATOR_FUNCTIONS(CUDAContext);
+  USE_OPERATOR_FUNCTIONS(Context);

-  TopKGradientOp(const OperatorDef& operator_def, Workspace* ws)
-      : Operator<CUDAContext>(operator_def, ws),
+  TopKGradientCudaOp(const OperatorDef& operator_def, Workspace* ws)
+      : Operator<Context>(operator_def, ws),
        OP_SINGLE_ARG(int, "axis", axis_, -1) {}

-  ~TopKGradientOp(){};
+  ~TopKGradientCudaOp(){};

  bool RunOnDevice() override;

@ -304,8 +304,8 @@ class TopKGradientOp<T, CUDAContext> : public Operator<CUDAContext> {
  int axis_;
 };

-template <typename T>
-bool TopKGradientOp<T, CUDAContext>::RunOnDevice() {
+template <typename T, typename Context>
+bool TopKGradientCudaOp<T, Context>::RunOnDevice() {
  const auto& values = Input(0);
  const auto& indices = Input(1);
  const auto& original_input = Input(2);
@ -319,7 +319,7 @@ bool TopKGradientOp<T, CUDAContext>::RunOnDevice() {
    axis_ = values_dims.size() - 1;
  }
  const int k = values_dims[axis_];
-  math::Set<T, CUDAContext>(output->size(), T(0), output_data, &context_);
+  math::Set<T, Context>(output->size(), T(0), output_data, &context_);
  const TIndex stride = std::accumulate(
      values_dims.cbegin() + axis_ + 1,
      values_dims.cend(),
@ -340,6 +340,6 @@ bool TopKGradientOp<T, CUDAContext>::RunOnDevice() {
  return true;
 }

-REGISTER_CUDA_OPERATOR(TopKGradient, TopKGradientOp<float, CUDAContext>);
+REGISTER_CUDA_OPERATOR(TopKGradient, TopKGradientCudaOp<float, CUDAContext>);

 } // namespace caffe2
--- a/caffe2/operators/transpose_op.h
+++ b/caffe2/operators/transpose_op.h
@ -41,7 +41,7 @@ class TransposeOp final : public Operator<Context> {
      CAFFE_ENFORCE_EQ(ndim, axes_.size());
    }
    // Do the actual transpose, which is implemented in DoRunWithType().
-    return DispatchHelper<TensorTypes<float, double, int, long>>::call(
+    return DispatchHelper<TensorTypes<float, double, int, TIndex>>::call(
        this, Input(0));
  }

--- a/caffe2/utils/math_cpu.cc
+++ b/caffe2/utils/math_cpu.cc
@ -2169,7 +2169,7 @@ void Transpose<float, CPUContext>(
  }
 CAFFE2_SPECIALIZED_TRANSPOSE(double)
 CAFFE2_SPECIALIZED_TRANSPOSE(int)
-CAFFE2_SPECIALIZED_TRANSPOSE(long)
+CAFFE2_SPECIALIZED_TRANSPOSE(TIndex)
 #undef CAFFE2_SPECIALIZED_TRANSPOSE

 } // namespace math
--- a/caffe2/utils/math_gpu.cu
+++ b/caffe2/utils/math_gpu.cu
@ -3001,7 +3001,7 @@ void TransposeCUDA(
 CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(float)
 CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(double)
 CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(int)
-CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(long)
+CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(TIndex)
 #undef CAFFE2_SPECIALIZED_CUDA_TRANSPOSE

 } // namespace math
--- a/scripts/build_windows.bat
+++ b/scripts/build_windows.bat
@ -51,6 +51,7 @@ cmake .. ^
  -DBUILD_TEST=OFF ^
  -DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% ^
  -DUSE_CUDA=%USE_CUDA% ^
+  -DCUDA_ARCH_NAME=Maxwell ^
  -DUSE_NNPACK=OFF ^
  -DUSE_CUB=OFF ^
  -DUSE_GLOG=OFF ^
@ -64,7 +65,7 @@ cmake .. ^
  || goto :label_error

 :: Actually run the build
-cmake --build . --config %CMAKE_BUILD_TYPE% || goto :label_error
+cmake --build . --config %CMAKE_BUILD_TYPE% -- /maxcpucount:%NUMBER_OF_PROCESSORS% || goto :label_error

 echo "Caffe2 built successfully"
 cd %ORIGINAL_DIR%
--- a/third_party/eigen
+++ b/third_party/eigen
@ -1 +1 @@
-Subproject commit 5a0ab9ff4e258b860470afe36e83a3e88b3ce14c
+Subproject commit e9e95489a0b241412e31f0525e85b2fab386c786