mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Fix caffe2 eigen + cuda9 windows build (#6746)
This commit is contained in:
parent
4e8e13d90c
commit
0d0dcde5a8
|
|
@ -45,13 +45,13 @@ __global__ void ComputeArgCUDAKernel(
|
|||
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
class ArgMaxOp<T, CUDAContext> final : public ArgOpBase<T, CUDAContext> {
|
||||
template <typename T, typename Context>
|
||||
class ArgMaxCudaOp final : public ArgOpBase<T, Context> {
|
||||
public:
|
||||
USE_OPERATOR_FUNCTIONS(CUDAContext);
|
||||
USE_OPERATOR_FUNCTIONS(Context);
|
||||
|
||||
ArgMaxOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ArgOpBase<T, CUDAContext>(operator_def, ws) {}
|
||||
ArgMaxCudaOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ArgOpBase<T, Context>(operator_def, ws) {}
|
||||
|
||||
protected:
|
||||
bool Compute(
|
||||
|
|
@ -62,8 +62,8 @@ class ArgMaxOp<T, CUDAContext> final : public ArgOpBase<T, CUDAContext> {
|
|||
TIndex* Y) override;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
bool ArgMaxOp<T, CUDAContext>::Compute(
|
||||
template <typename T, typename Context>
|
||||
bool ArgMaxCudaOp<T, Context>::Compute(
|
||||
const T* X,
|
||||
const TIndex prev_size,
|
||||
const TIndex next_size,
|
||||
|
|
@ -85,13 +85,13 @@ bool ArgMaxOp<T, CUDAContext>::Compute(
|
|||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class ArgMinOp<T, CUDAContext> final : public ArgOpBase<T, CUDAContext> {
|
||||
template <typename T, typename Context>
|
||||
class ArgMinCudaOp final : public ArgOpBase<T, Context> {
|
||||
public:
|
||||
USE_OPERATOR_FUNCTIONS(CUDAContext);
|
||||
USE_OPERATOR_FUNCTIONS(Context);
|
||||
|
||||
ArgMinOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ArgOpBase<T, CUDAContext>(operator_def, ws) {}
|
||||
ArgMinCudaOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ArgOpBase<T, Context>(operator_def, ws) {}
|
||||
|
||||
protected:
|
||||
bool Compute(
|
||||
|
|
@ -102,8 +102,8 @@ class ArgMinOp<T, CUDAContext> final : public ArgOpBase<T, CUDAContext> {
|
|||
TIndex* Y) override;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
bool ArgMinOp<T, CUDAContext>::Compute(
|
||||
template <typename T, typename Context>
|
||||
bool ArgMinCudaOp<T, Context>::Compute(
|
||||
const T* X,
|
||||
const TIndex prev_size,
|
||||
const TIndex next_size,
|
||||
|
|
@ -125,7 +125,7 @@ bool ArgMinOp<T, CUDAContext>::Compute(
|
|||
return true;
|
||||
}
|
||||
|
||||
REGISTER_CUDA_OPERATOR(ArgMax, ArgMaxOp<float, CUDAContext>);
|
||||
REGISTER_CUDA_OPERATOR(ArgMin, ArgMinOp<float, CUDAContext>);
|
||||
REGISTER_CUDA_OPERATOR(ArgMax, ArgMaxCudaOp<float, CUDAContext>);
|
||||
REGISTER_CUDA_OPERATOR(ArgMin, ArgMinCudaOp<float, CUDAContext>);
|
||||
|
||||
} // namespace caffe2
|
||||
|
|
|
|||
|
|
@ -180,14 +180,14 @@ void ComputeReduceMinMaxGradientCUDA(
|
|||
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
class ReduceMinMaxGradientOp<T, CUDAContext> final
|
||||
: public ReduceGradientOpBase<T, CUDAContext> {
|
||||
template <typename T, typename Context>
|
||||
class ReduceMinMaxGradientCudaOp final
|
||||
: public ReduceGradientOpBase<T, Context> {
|
||||
public:
|
||||
USE_OPERATOR_FUNCTIONS(CUDAContext);
|
||||
USE_OPERATOR_FUNCTIONS(Context);
|
||||
|
||||
ReduceMinMaxGradientOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ReduceGradientOpBase<T, CUDAContext>(operator_def, ws) {}
|
||||
ReduceMinMaxGradientCudaOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ReduceGradientOpBase<T, Context>(operator_def, ws) {}
|
||||
|
||||
protected:
|
||||
bool Compute(
|
||||
|
|
@ -207,12 +207,12 @@ class ReduceMinMaxGradientOp<T, CUDAContext> final
|
|||
REGISTER_CUDA_OPERATOR(ReduceMin, ReduceMinOp<float, CUDAContext>);
|
||||
REGISTER_CUDA_OPERATOR(
|
||||
ReduceMinGradient,
|
||||
ReduceMinMaxGradientOp<float, CUDAContext>);
|
||||
ReduceMinMaxGradientCudaOp<float, CUDAContext>);
|
||||
|
||||
REGISTER_CUDA_OPERATOR(ReduceMax, ReduceMaxOp<float, CUDAContext>);
|
||||
REGISTER_CUDA_OPERATOR(
|
||||
ReduceMaxGradient,
|
||||
ReduceMinMaxGradientOp<float, CUDAContext>);
|
||||
ReduceMinMaxGradientCudaOp<float, CUDAContext>);
|
||||
|
||||
REGISTER_CUDA_OPERATOR(ReduceSum, ReduceSumOp<float, CUDAContext>);
|
||||
REGISTER_CUDA_OPERATOR(
|
||||
|
|
|
|||
|
|
@ -139,19 +139,19 @@ __global__ void SetTopKGradientCUDAKernel(
|
|||
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
class TopKOp<T, CUDAContext> : public Operator<CUDAContext> {
|
||||
template <typename T, typename Context>
|
||||
class TopKCudaOp : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_FUNCTIONS(CUDAContext);
|
||||
USE_OPERATOR_FUNCTIONS(Context);
|
||||
|
||||
TopKOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<CUDAContext>(operator_def, ws),
|
||||
TopKCudaOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws),
|
||||
OP_SINGLE_ARG(int, "k", k_, -1),
|
||||
OP_SINGLE_ARG(int, "axis", axis_, -1) {
|
||||
CAFFE_ENFORCE(k_ >= 1, "k argument must be >= 1");
|
||||
}
|
||||
|
||||
~TopKOp(){};
|
||||
~TopKCudaOp(){};
|
||||
|
||||
bool RunOnDevice() override;
|
||||
|
||||
|
|
@ -160,22 +160,22 @@ class TopKOp<T, CUDAContext> : public Operator<CUDAContext> {
|
|||
int axis_;
|
||||
|
||||
// Buffers for CUDAContext.
|
||||
TensorCUDA input_transposed_buffer_;
|
||||
TensorCUDA values_transposed_buffer_;
|
||||
TensorCUDA indices_transposed_buffer_;
|
||||
Tensor<Context> input_transposed_buffer_;
|
||||
Tensor<Context> values_transposed_buffer_;
|
||||
Tensor<Context> indices_transposed_buffer_;
|
||||
|
||||
// Shape tensors on device for CUDAContext.
|
||||
TensorCUDA input_dims_device_;
|
||||
TensorCUDA input_transposed_dims_device_;
|
||||
TensorCUDA input_axes_device_;
|
||||
Tensor<Context> input_dims_device_;
|
||||
Tensor<Context> input_transposed_dims_device_;
|
||||
Tensor<Context> input_axes_device_;
|
||||
|
||||
TensorCUDA output_dims_device_;
|
||||
TensorCUDA output_transposed_dims_device_;
|
||||
TensorCUDA output_transposed_axes_device_;
|
||||
Tensor<Context> output_dims_device_;
|
||||
Tensor<Context> output_transposed_dims_device_;
|
||||
Tensor<Context> output_transposed_axes_device_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
bool TopKOp<T, CUDAContext>::RunOnDevice() {
|
||||
template <typename T, typename Context>
|
||||
bool TopKCudaOp<T, Context>::RunOnDevice() {
|
||||
const auto& input = Input(0);
|
||||
auto* values = Output(0);
|
||||
auto* indices = Output(1);
|
||||
|
|
@ -234,11 +234,11 @@ bool TopKOp<T, CUDAContext>::RunOnDevice() {
|
|||
dims.data(),
|
||||
axes.data(),
|
||||
input.template data<T>(),
|
||||
input_transposed_buffer_.mutable_data<T>(),
|
||||
input_transposed_buffer_.template mutable_data<T>(),
|
||||
&context_);
|
||||
input_data = input_transposed_buffer_.data<T>();
|
||||
values_data = values_transposed_buffer_.mutable_data<T>();
|
||||
indices_data = indices_transposed_buffer_.mutable_data<TIndex>();
|
||||
input_data = input_transposed_buffer_.template data<T>();
|
||||
values_data = values_transposed_buffer_.template mutable_data<T>();
|
||||
indices_data = indices_transposed_buffer_.template mutable_data<TIndex>();
|
||||
}
|
||||
RunTopKOnLastDimCUDAImpl<T>(
|
||||
input_data,
|
||||
|
|
@ -256,14 +256,14 @@ bool TopKOp<T, CUDAContext>::RunOnDevice() {
|
|||
3,
|
||||
dims.data(),
|
||||
axes.data(),
|
||||
values_transposed_buffer_.data<T>(),
|
||||
values_transposed_buffer_.template data<T>(),
|
||||
values->template mutable_data<T>(),
|
||||
&context_);
|
||||
math::Transpose(
|
||||
3,
|
||||
dims.data(),
|
||||
axes.data(),
|
||||
indices_transposed_buffer_.data<TIndex>(),
|
||||
indices_transposed_buffer_.template data<TIndex>(),
|
||||
indices->template mutable_data<TIndex>(),
|
||||
&context_);
|
||||
}
|
||||
|
|
@ -285,18 +285,18 @@ bool TopKOp<T, CUDAContext>::RunOnDevice() {
|
|||
return true;
|
||||
}
|
||||
|
||||
REGISTER_CUDA_OPERATOR(TopK, TopKOp<float, CUDAContext>);
|
||||
REGISTER_CUDA_OPERATOR(TopK, TopKCudaOp<float, CUDAContext>);
|
||||
|
||||
template <typename T>
|
||||
class TopKGradientOp<T, CUDAContext> : public Operator<CUDAContext> {
|
||||
template <typename T, typename Context>
|
||||
class TopKGradientCudaOp : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_FUNCTIONS(CUDAContext);
|
||||
USE_OPERATOR_FUNCTIONS(Context);
|
||||
|
||||
TopKGradientOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<CUDAContext>(operator_def, ws),
|
||||
TopKGradientCudaOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws),
|
||||
OP_SINGLE_ARG(int, "axis", axis_, -1) {}
|
||||
|
||||
~TopKGradientOp(){};
|
||||
~TopKGradientCudaOp(){};
|
||||
|
||||
bool RunOnDevice() override;
|
||||
|
||||
|
|
@ -304,8 +304,8 @@ class TopKGradientOp<T, CUDAContext> : public Operator<CUDAContext> {
|
|||
int axis_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
bool TopKGradientOp<T, CUDAContext>::RunOnDevice() {
|
||||
template <typename T, typename Context>
|
||||
bool TopKGradientCudaOp<T, Context>::RunOnDevice() {
|
||||
const auto& values = Input(0);
|
||||
const auto& indices = Input(1);
|
||||
const auto& original_input = Input(2);
|
||||
|
|
@ -319,7 +319,7 @@ bool TopKGradientOp<T, CUDAContext>::RunOnDevice() {
|
|||
axis_ = values_dims.size() - 1;
|
||||
}
|
||||
const int k = values_dims[axis_];
|
||||
math::Set<T, CUDAContext>(output->size(), T(0), output_data, &context_);
|
||||
math::Set<T, Context>(output->size(), T(0), output_data, &context_);
|
||||
const TIndex stride = std::accumulate(
|
||||
values_dims.cbegin() + axis_ + 1,
|
||||
values_dims.cend(),
|
||||
|
|
@ -340,6 +340,6 @@ bool TopKGradientOp<T, CUDAContext>::RunOnDevice() {
|
|||
return true;
|
||||
}
|
||||
|
||||
REGISTER_CUDA_OPERATOR(TopKGradient, TopKGradientOp<float, CUDAContext>);
|
||||
REGISTER_CUDA_OPERATOR(TopKGradient, TopKGradientCudaOp<float, CUDAContext>);
|
||||
|
||||
} // namespace caffe2
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ class TransposeOp final : public Operator<Context> {
|
|||
CAFFE_ENFORCE_EQ(ndim, axes_.size());
|
||||
}
|
||||
// Do the actual transpose, which is implemented in DoRunWithType().
|
||||
return DispatchHelper<TensorTypes<float, double, int, long>>::call(
|
||||
return DispatchHelper<TensorTypes<float, double, int, TIndex>>::call(
|
||||
this, Input(0));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2169,7 +2169,7 @@ void Transpose<float, CPUContext>(
|
|||
}
|
||||
CAFFE2_SPECIALIZED_TRANSPOSE(double)
|
||||
CAFFE2_SPECIALIZED_TRANSPOSE(int)
|
||||
CAFFE2_SPECIALIZED_TRANSPOSE(long)
|
||||
CAFFE2_SPECIALIZED_TRANSPOSE(TIndex)
|
||||
#undef CAFFE2_SPECIALIZED_TRANSPOSE
|
||||
|
||||
} // namespace math
|
||||
|
|
|
|||
|
|
@ -3001,7 +3001,7 @@ void TransposeCUDA(
|
|||
CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(float)
|
||||
CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(double)
|
||||
CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(int)
|
||||
CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(long)
|
||||
CAFFE2_SPECIALIZED_CUDA_TRANSPOSE(TIndex)
|
||||
#undef CAFFE2_SPECIALIZED_CUDA_TRANSPOSE
|
||||
|
||||
} // namespace math
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ cmake .. ^
|
|||
-DBUILD_TEST=OFF ^
|
||||
-DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% ^
|
||||
-DUSE_CUDA=%USE_CUDA% ^
|
||||
-DCUDA_ARCH_NAME=Maxwell ^
|
||||
-DUSE_NNPACK=OFF ^
|
||||
-DUSE_CUB=OFF ^
|
||||
-DUSE_GLOG=OFF ^
|
||||
|
|
@ -64,7 +65,7 @@ cmake .. ^
|
|||
|| goto :label_error
|
||||
|
||||
:: Actually run the build
|
||||
cmake --build . --config %CMAKE_BUILD_TYPE% || goto :label_error
|
||||
cmake --build . --config %CMAKE_BUILD_TYPE% -- /maxcpucount:%NUMBER_OF_PROCESSORS% || goto :label_error
|
||||
|
||||
echo "Caffe2 built successfully"
|
||||
cd %ORIGINAL_DIR%
|
||||
|
|
|
|||
2
third_party/eigen
vendored
2
third_party/eigen
vendored
|
|
@ -1 +1 @@
|
|||
Subproject commit 5a0ab9ff4e258b860470afe36e83a3e88b3ce14c
|
||||
Subproject commit e9e95489a0b241412e31f0525e85b2fab386c786
|
||||
Loading…
Reference in New Issue
Block a user