Fix caffe2 eigen + cuda9 windows build (#6746)

This commit is contained in:
bddppq 2018-04-22 09:36:09 -07:00 committed by Xiaomeng Yang
parent 4e8e13d90c
commit 0d0dcde5a8
8 changed files with 64 additions and 63 deletions

View File

@ -45,13 +45,13 @@ __global__ void ComputeArgCUDAKernel(
} // namespace
template <typename T>
class ArgMaxOp<T, CUDAContext> final : public ArgOpBase<T, CUDAContext> {
template <typename T, typename Context>
class ArgMaxCudaOp final : public ArgOpBase<T, Context> {
public:
USE_OPERATOR_FUNCTIONS(CUDAContext);
USE_OPERATOR_FUNCTIONS(Context);
ArgMaxOp(const OperatorDef& operator_def, Workspace* ws)
: ArgOpBase<T, CUDAContext>(operator_def, ws) {}
ArgMaxCudaOp(const OperatorDef& operator_def, Workspace* ws)
: ArgOpBase<T, Context>(operator_def, ws) {}
protected:
bool Compute(
@ -62,8 +62,8 @@ class ArgMaxOp<T, CUDAContext> final : public ArgOpBase<T, CUDAContext> {
TIndex* Y) override;
};
template <typename T>
bool ArgMaxOp<T, CUDAContext>::Compute(
template <typename T, typename Context>
bool ArgMaxCudaOp<T, Context>::Compute(
const T* X,
const TIndex prev_size,
const TIndex next_size,
@ -85,13 +85,13 @@ bool ArgMaxOp<T, CUDAContext>::Compute(
return true;
}
template <typename T>
class ArgMinOp<T, CUDAContext> final : public ArgOpBase<T, CUDAContext> {
template <typename T, typename Context>
class ArgMinCudaOp final : public ArgOpBase<T, Context> {
public:
USE_OPERATOR_FUNCTIONS(CUDAContext);
USE_OPERATOR_FUNCTIONS(Context);
ArgMinOp(const OperatorDef& operator_def, Workspace* ws)
: ArgOpBase<T, CUDAContext>(operator_def, ws) {}
ArgMinCudaOp(const OperatorDef& operator_def, Workspace* ws)
: ArgOpBase<T, Context>(operator_def, ws) {}
protected:
bool Compute(
@ -102,8 +102,8 @@ class ArgMinOp<T, CUDAContext> final : public ArgOpBase<T, CUDAContext> {
TIndex* Y) override;
};
template <typename T>
bool ArgMinOp<T, CUDAContext>::Compute(
template <typename T, typename Context>
bool ArgMinCudaOp<T, Context>::Compute(
const T* X,
const TIndex prev_size,
const TIndex next_size,
@ -125,7 +125,7 @@ bool ArgMinOp<T, CUDAContext>::Compute(
return true;
}
REGISTER_CUDA_OPERATOR(ArgMax, ArgMaxOp<float, CUDAContext>);
REGISTER_CUDA_OPERATOR(ArgMin, ArgMinOp<float, CUDAContext>);
REGISTER_CUDA_OPERATOR(ArgMax, ArgMaxCudaOp<float, CUDAContext>);
REGISTER_CUDA_OPERATOR(ArgMin, ArgMinCudaOp<float, CUDAContext>);
} // namespace caffe2

View File

@ -180,14 +180,14 @@ void ComputeReduceMinMaxGradientCUDA(
} // namespace
template <typename T>
class ReduceMinMaxGradientOp<T, CUDAContext> final
: public ReduceGradientOpBase<T, CUDAContext> {
template <typename T, typename Context>
class ReduceMinMaxGradientCudaOp final
: public ReduceGradientOpBase<T, Context> {
public:
USE_OPERATOR_FUNCTIONS(CUDAContext);
USE_OPERATOR_FUNCTIONS(Context);
ReduceMinMaxGradientOp(const OperatorDef& operator_def, Workspace* ws)
: ReduceGradientOpBase<T, CUDAContext>(operator_def, ws) {}
ReduceMinMaxGradientCudaOp(const OperatorDef& operator_def, Workspace* ws)
: ReduceGradientOpBase<T, Context>(operator_def, ws) {}
protected:
bool Compute(
@ -207,12 +207,12 @@ class ReduceMinMaxGradientOp<T, CUDAContext> final
REGISTER_CUDA_OPERATOR(ReduceMin, ReduceMinOp<float, CUDAContext>);
REGISTER_CUDA_OPERATOR(
ReduceMinGradient,
ReduceMinMaxGradientOp<float, CUDAContext>);
ReduceMinMaxGradientCudaOp<float, CUDAContext>);
REGISTER_CUDA_OPERATOR(ReduceMax, ReduceMaxOp<float, CUDAContext>);
REGISTER_CUDA_OPERATOR(
ReduceMaxGradient,
ReduceMinMaxGradientOp<float, CUDAContext>);
ReduceMinMaxGradientCudaOp<float, CUDAContext>);
REGISTER_CUDA_OPERATOR(ReduceSum, ReduceSumOp<float, CUDAContext>);
REGISTER_CUDA_OPERATOR(

View File

@ -139,19 +139,19 @@ __global__ void SetTopKGradientCUDAKernel(
} // namespace
template <typename T>
class TopKOp<T, CUDAContext> : public Operator<CUDAContext> {
template <typename T, typename Context>
class TopKCudaOp : public Operator<Context> {
public:
USE_OPERATOR_FUNCTIONS(CUDAContext);
USE_OPERATOR_FUNCTIONS(Context);
TopKOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<CUDAContext>(operator_def, ws),
TopKCudaOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
OP_SINGLE_ARG(int, "k", k_, -1),
OP_SINGLE_ARG(int, "axis", axis_, -1) {
CAFFE_ENFORCE(k_ >= 1, "k argument must be >= 1");
}
~TopKOp(){};
~TopKCudaOp(){};
bool RunOnDevice() override;
@ -160,22 +160,22 @@ class TopKOp<T, CUDAContext> : public Operator<CUDAContext> {
int axis_;
// Buffers for CUDAContext.
TensorCUDA input_transposed_buffer_;
TensorCUDA values_transposed_buffer_;
TensorCUDA indices_transposed_buffer_;
Tensor<Context> input_transposed_buffer_;
Tensor<Context> values_transposed_buffer_;
Tensor<Context> indices_transposed_buffer_;
// Shape tensors on device for CUDAContext.
TensorCUDA input_dims_device_;
TensorCUDA input_transposed_dims_device_;
TensorCUDA input_axes_device_;
Tensor<Context> input_dims_device_;
Tensor<Context> input_transposed_dims_device_;
Tensor<Context> input_axes_device_;
TensorCUDA output_dims_device_;
TensorCUDA output_transposed_dims_device_;
TensorCUDA output_transposed_axes_device_;
Tensor<Context> output_dims_device_;
Tensor<Context> output_transposed_dims_device_;
Tensor<Context> output_transposed_axes_device_;
};
template <typename T>
bool TopKOp<T, CUDAContext>::RunOnDevice() {
template <typename T, typename Context>
bool TopKCudaOp<T, Context>::RunOnDevice() {
const auto& input = Input(0);
auto* values = Output(0);
auto* indices = Output(1);
@ -234,11 +234,11 @@ bool TopKOp<T, CUDAContext>::RunOnDevice() {
dims.data(),
axes.data(),
input.template data<T>(),
input_transposed_buffer_.mutable_data<T>(),
input_transposed_buffer_.template mutable_data<T>(),
&context_);
input_data = input_transposed_buffer_.data<T>();
values_data = values_transposed_buffer_.mutable_data<T>();
indices_data = indices_transposed_buffer_.mutable_data<TIndex>();
input_data = input_transposed_buffer_.template data<T>();
values_data = values_transposed_buffer_.template mutable_data<T>();
indices_data = indices_transposed_buffer_.template mutable_data<TIndex>();
}
RunTopKOnLastDimCUDAImpl<T>(
input_data,
@ -256,14 +256,14 @@ bool TopKOp<T, CUDAContext>::RunOnDevice() {
3,
dims.data(),
axes.data(),
values_transposed_buffer_.data<T>(),
values_transposed_buffer_.template data<T>(),
values->template mutable_data<T>(),
&context_);
math::Transpose(
3,
dims.data(),
axes.data(),
indices_transposed_buffer_.data<TIndex>(),
indices_transposed_buffer_.template data<TIndex>(),
indices->template mutable_data<TIndex>(),
&context_);
}
@ -285,18 +285,18 @@ bool TopKOp<T, CUDAContext>::RunOnDevice() {
return true;
}
REGISTER_CUDA_OPERATOR(TopK, TopKOp<float, CUDAContext>);
REGISTER_CUDA_OPERATOR(TopK, TopKCudaOp<float, CUDAContext>);
template <typename T>
class TopKGradientOp<T, CUDAContext> : public Operator<CUDAContext> {
template <typename T, typename Context>
class TopKGradientCudaOp : public Operator<Context> {
public:
USE_OPERATOR_FUNCTIONS(CUDAContext);
USE_OPERATOR_FUNCTIONS(Context);
TopKGradientOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<CUDAContext>(operator_def, ws),
TopKGradientCudaOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
OP_SINGLE_ARG(int, "axis", axis_, -1) {}
~TopKGradientOp(){};
~TopKGradientCudaOp(){};
bool RunOnDevice() override;
@ -304,8 +304,8 @@ class TopKGradientOp<T, CUDAContext> : public Operator<CUDAContext> {
int axis_;
};
template <typename T>
bool TopKGradientOp<T, CUDAContext>::RunOnDevice() {
template <typename T, typename Context>
bool TopKGradientCudaOp<T, Context>::RunOnDevice() {
const auto& values = Input(0);
const auto& indices = Input(1);
const auto& original_input = Input(2);
@ -319,7 +319,7 @@ bool TopKGradientOp<T, CUDAContext>::RunOnDevice() {
axis_ = values_dims.size() - 1;
}
const int k = values_dims[axis_];
math::Set<T, CUDAContext>(output->size(), T(0), output_data, &context_);
math::Set<T, Context>(output->size(), T(0), output_data, &context_);
const TIndex stride = std::accumulate(
values_dims.cbegin() + axis_ + 1,
values_dims.cend(),
@ -340,6 +340,6 @@ bool TopKGradientOp<T, CUDAContext>::RunOnDevice() {
return true;
}
REGISTER_CUDA_OPERATOR(TopKGradient, TopKGradientOp<float, CUDAContext>);
REGISTER_CUDA_OPERATOR(TopKGradient, TopKGradientCudaOp<float, CUDAContext>);
} // namespace caffe2

View File

@ -41,7 +41,7 @@ class TransposeOp final : public Operator<Context> {
CAFFE_ENFORCE_EQ(ndim, axes_.size());
}
// Do the actual transpose, which is implemented in DoRunWithType().
return DispatchHelper<TensorTypes<float, double, int, long>>::call(
return DispatchHelper<TensorTypes<float, double, int, TIndex>>::call(
this, Input(0));
}

View File

@ -2169,7 +2169,7 @@ void Transpose<float, CPUContext>(
}
CAFFE2_SPECIALIZED_TRANSPOSE(double)
CAFFE2_SPECIALIZED_TRANSPOSE(int)
CAFFE2_SPECIALIZED_TRANSPOSE(long)
CAFFE2_SPECIALIZED_TRANSPOSE(TIndex)
#undef CAFFE2_SPECIALIZED_TRANSPOSE
} // namespace math

View File

@ -3001,7 +3001,7 @@ void TransposeCUDA(
CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(float)
CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(double)
CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(int)
CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(long)
CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(TIndex)
#undef CAFFE2_SPECIALIZED_CUDA_TRANSPOSE
} // namespace math

View File

@ -51,6 +51,7 @@ cmake .. ^
-DBUILD_TEST=OFF ^
-DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% ^
-DUSE_CUDA=%USE_CUDA% ^
-DCUDA_ARCH_NAME=Maxwell ^
-DUSE_NNPACK=OFF ^
-DUSE_CUB=OFF ^
-DUSE_GLOG=OFF ^
@ -64,7 +65,7 @@ cmake .. ^
|| goto :label_error
:: Actually run the build
cmake --build . --config %CMAKE_BUILD_TYPE% || goto :label_error
cmake --build . --config %CMAKE_BUILD_TYPE% -- /maxcpucount:%NUMBER_OF_PROCESSORS% || goto :label_error
echo "Caffe2 built successfully"
cd %ORIGINAL_DIR%

2
third_party/eigen vendored

@ -1 +1 @@
Subproject commit 5a0ab9ff4e258b860470afe36e83a3e88b3ce14c
Subproject commit e9e95489a0b241412e31f0525e85b2fab386c786