mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Tensor construction: combine Resize+mutable_data - 2/4 (#14205)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/14205 Original commit changeset: 8f9fb55842ae Reviewed By: dzhulgakov Differential Revision: D13126263 fbshipit-source-id: 12ba89e31b7738a81ec5c660ea7b79e8576c35dc
This commit is contained in:
parent
f6354d903a
commit
2e0f3b038c
|
|
@ -483,14 +483,14 @@ class LengthsRangeFillOp : public Operator<Context> {
|
||||||
|
|
||||||
bool RunOnDevice() override {
|
bool RunOnDevice() override {
|
||||||
auto& input = Input(0);
|
auto& input = Input(0);
|
||||||
auto* output = Output(0);
|
|
||||||
auto* input_data = input.template data<int32_t>();
|
auto* input_data = input.template data<int32_t>();
|
||||||
|
|
||||||
CAFFE_ENFORCE_EQ(input.dim(), 1, "Input must be a vector.");
|
CAFFE_ENFORCE_EQ(input.dim(), 1, "Input must be a vector.");
|
||||||
|
|
||||||
auto len_sum = std::accumulate(input_data, input_data + input.numel(), 0);
|
auto len_sum = std::accumulate(input_data, input_data + input.numel(), 0);
|
||||||
|
|
||||||
output->Resize(len_sum);
|
auto* output = Output(0, {len_sum}, at::dtype<int32_t>());
|
||||||
auto* output_data = output->template mutable_data<int32_t>();
|
auto* output_data = output->template mutable_data<int32_t>();
|
||||||
|
|
||||||
int32_t offset = 0;
|
int32_t offset = 0;
|
||||||
|
|
|
||||||
|
|
@ -40,8 +40,8 @@ class FindDuplicateElementsOp final : public Operator<Context> {
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto dupSize = dupIndices.size();
|
const auto dupSize = dupIndices.size();
|
||||||
auto* output = Output(0);
|
|
||||||
output->Resize(dupSize);
|
auto* output = Output(0, {static_cast<int64_t>(dupSize)}, at::dtype<int64_t>());
|
||||||
auto* out_ptr = output->template mutable_data<int64_t>();
|
auto* out_ptr = output->template mutable_data<int64_t>();
|
||||||
for (int64_t i = 0; i < dupSize; ++i) {
|
for (int64_t i = 0; i < dupSize; ++i) {
|
||||||
out_ptr[i] = dupIndices[i];
|
out_ptr[i] = dupIndices[i];
|
||||||
|
|
|
||||||
|
|
@ -23,8 +23,6 @@ template <typename T, class Context>
|
||||||
bool FlexibleTopKOp<T, Context>::RunOnDevice() {
|
bool FlexibleTopKOp<T, Context>::RunOnDevice() {
|
||||||
auto& input = Input(0);
|
auto& input = Input(0);
|
||||||
auto& k = Input(1);
|
auto& k = Input(1);
|
||||||
auto* values = Output(0);
|
|
||||||
auto* indices = Output(1);
|
|
||||||
|
|
||||||
const T* input_data = input.template data<T>();
|
const T* input_data = input.template data<T>();
|
||||||
const int64_t* k_data = k.template data<int64_t>();
|
const int64_t* k_data = k.template data<int64_t>();
|
||||||
|
|
@ -55,8 +53,8 @@ bool FlexibleTopKOp<T, Context>::RunOnDevice() {
|
||||||
k_data[i]);
|
k_data[i]);
|
||||||
output_size += k_data[i];
|
output_size += k_data[i];
|
||||||
}
|
}
|
||||||
values->Resize(output_size);
|
auto* values = Output(0, {output_size}, at::dtype<T>());
|
||||||
indices->Resize(output_size);
|
auto* indices = Output(1, {output_size}, at::dtype<int64_t>());
|
||||||
T* values_data = values->template mutable_data<T>();
|
T* values_data = values->template mutable_data<T>();
|
||||||
int64_t* indices_data = indices->template mutable_data<int64_t>();
|
int64_t* indices_data = indices->template mutable_data<int64_t>();
|
||||||
|
|
||||||
|
|
@ -99,7 +97,6 @@ bool FlexibleTopKGradientOp<T, Context>::RunOnDevice() {
|
||||||
auto& k = Input(1);
|
auto& k = Input(1);
|
||||||
auto& values = Input(2);
|
auto& values = Input(2);
|
||||||
auto& indices = Input(3);
|
auto& indices = Input(3);
|
||||||
auto* output = Output(0);
|
|
||||||
|
|
||||||
const int64_t* k_data = k.template data<int64_t>();
|
const int64_t* k_data = k.template data<int64_t>();
|
||||||
const T* values_data = values.template data<T>();
|
const T* values_data = values.template data<T>();
|
||||||
|
|
@ -108,7 +105,7 @@ bool FlexibleTopKGradientOp<T, Context>::RunOnDevice() {
|
||||||
// Resize output tensors to be as orignial_input size and initialized with 0
|
// Resize output tensors to be as orignial_input size and initialized with 0
|
||||||
CAFFE_ENFORCE_GT(original_input.dim(), 0);
|
CAFFE_ENFORCE_GT(original_input.dim(), 0);
|
||||||
vector<int64_t> original_dims = original_input.sizes().vec();
|
vector<int64_t> original_dims = original_input.sizes().vec();
|
||||||
output->Resize(original_dims);
|
auto* output = Output(0, original_dims, at::dtype<T>());
|
||||||
T* output_data = output->template mutable_data<T>();
|
T* output_data = output->template mutable_data<T>();
|
||||||
math::Set<T, Context>(
|
math::Set<T, Context>(
|
||||||
output->numel(), static_cast<T>(0), output_data, &context_);
|
output->numel(), static_cast<T>(0), output_data, &context_);
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ class FullyConnectedOp final : public Operator<Context> {
|
||||||
const auto& X = Input(0);
|
const auto& X = Input(0);
|
||||||
const auto& W = Input(1);
|
const auto& W = Input(1);
|
||||||
const auto& b = Input(2);
|
const auto& b = Input(2);
|
||||||
auto* Y = Output(0);
|
|
||||||
CAFFE_ENFORCE(b.dim() == 1, b.dim());
|
CAFFE_ENFORCE(b.dim() == 1, b.dim());
|
||||||
// batch size
|
// batch size
|
||||||
const auto canonical_axis = X.canonical_axis_index(axis_);
|
const auto canonical_axis = X.canonical_axis_index(axis_);
|
||||||
|
|
@ -74,7 +74,7 @@ class FullyConnectedOp final : public Operator<Context> {
|
||||||
DCHECK_LE(canonical_axis + 1, Y_shape_cache_.size());
|
DCHECK_LE(canonical_axis + 1, Y_shape_cache_.size());
|
||||||
Y_shape_cache_.resize(canonical_axis + 1);
|
Y_shape_cache_.resize(canonical_axis + 1);
|
||||||
Y_shape_cache_[canonical_axis] = N;
|
Y_shape_cache_[canonical_axis] = N;
|
||||||
Y->Resize(Y_shape_cache_);
|
auto* Y = Output(0, Y_shape_cache_, at::dtype<T_Y>());
|
||||||
CAFFE_ENFORCE(M * N == Y->numel(), dimErrorString());
|
CAFFE_ENFORCE(M * N == Y->numel(), dimErrorString());
|
||||||
|
|
||||||
if (X.numel() == 0) {
|
if (X.numel() == 0) {
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,6 @@ class FloatToFused8BitRowwiseQuantizedOp : public Operator<Context> {
|
||||||
CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
|
CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
|
||||||
|
|
||||||
const auto& input = Input(DATA_FLOAT);
|
const auto& input = Input(DATA_FLOAT);
|
||||||
auto* output = Output(DATA_FUSED_SCALE_BIAS_INT8);
|
|
||||||
|
|
||||||
const auto input_rows = input.size(0);
|
const auto input_rows = input.size(0);
|
||||||
const auto input_columns = input.size(1);
|
const auto input_columns = input.size(1);
|
||||||
|
|
@ -48,7 +47,7 @@ class FloatToFused8BitRowwiseQuantizedOp : public Operator<Context> {
|
||||||
// | number_of_columns | 4B | 4B |
|
// | number_of_columns | 4B | 4B |
|
||||||
const std::vector<int64_t> output_dimensions = {input_rows,
|
const std::vector<int64_t> output_dimensions = {input_rows,
|
||||||
input_columns + 8};
|
input_columns + 8};
|
||||||
output->Resize(output_dimensions);
|
auto* output = Output(DATA_FUSED_SCALE_BIAS_INT8, output_dimensions, at::dtype<uint8_t>());
|
||||||
|
|
||||||
const auto* input_data = input.template data<T>();
|
const auto* input_data = input.template data<T>();
|
||||||
auto* output_data = output->template mutable_data<uint8_t>();
|
auto* output_data = output->template mutable_data<uint8_t>();
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,6 @@ bool FloatToFusedRandRowwiseQuantizedOp<Context>::RunOnDevice() {
|
||||||
CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
|
CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
|
||||||
|
|
||||||
const auto& input = Input(DATA_FLOAT);
|
const auto& input = Input(DATA_FLOAT);
|
||||||
auto* output = Output(DATA_FUSED_QUANTIZED);
|
|
||||||
|
|
||||||
CAFFE_ENFORCE_EQ(
|
CAFFE_ENFORCE_EQ(
|
||||||
input.dim(),
|
input.dim(),
|
||||||
|
|
@ -40,7 +39,8 @@ bool FloatToFusedRandRowwiseQuantizedOp<Context>::RunOnDevice() {
|
||||||
size_t segment_size = (input_columns + data_per_byte - 1) / data_per_byte;
|
size_t segment_size = (input_columns + data_per_byte - 1) / data_per_byte;
|
||||||
const std::vector<int64_t> output_dimensions = {
|
const std::vector<int64_t> output_dimensions = {
|
||||||
input_rows, 10 + static_cast<int64_t>(segment_size)};
|
input_rows, 10 + static_cast<int64_t>(segment_size)};
|
||||||
output->Resize(output_dimensions);
|
auto* output =
|
||||||
|
Output(DATA_FUSED_QUANTIZED, output_dimensions, at::dtype<uint8_t>());
|
||||||
|
|
||||||
const auto* input_data = input.template data<float>();
|
const auto* input_data = input.template data<float>();
|
||||||
auto* output_data = output->template mutable_data<uint8_t>();
|
auto* output_data = output->template mutable_data<uint8_t>();
|
||||||
|
|
@ -78,7 +78,7 @@ bool FusedRandRowwiseQuantizedToFloatOp<Context>::RunOnDevice() {
|
||||||
CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
|
CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
|
||||||
|
|
||||||
const auto& input = Input(DATA_FUSED_QUANTIZED);
|
const auto& input = Input(DATA_FUSED_QUANTIZED);
|
||||||
auto* output = Output(DATA_FLOAT);
|
|
||||||
CAFFE_ENFORCE_EQ(input.dim(), 2, "Expect input to be a matrix.");
|
CAFFE_ENFORCE_EQ(input.dim(), 2, "Expect input to be a matrix.");
|
||||||
CAFFE_ENFORCE_GE(
|
CAFFE_ENFORCE_GE(
|
||||||
input.numel(),
|
input.numel(),
|
||||||
|
|
@ -96,7 +96,7 @@ bool FusedRandRowwiseQuantizedToFloatOp<Context>::RunOnDevice() {
|
||||||
const size_t output_columns = (input_columns - 10) * (8 / bitwidth) - tail;
|
const size_t output_columns = (input_columns - 10) * (8 / bitwidth) - tail;
|
||||||
const std::vector<int64_t> output_dimensions = {
|
const std::vector<int64_t> output_dimensions = {
|
||||||
input_rows, static_cast<int64_t>(output_columns)};
|
input_rows, static_cast<int64_t>(output_columns)};
|
||||||
output->Resize(output_dimensions);
|
auto* output = Output(DATA_FLOAT, output_dimensions, at::dtype<float>());
|
||||||
auto* output_data = output->template mutable_data<float>();
|
auto* output_data = output->template mutable_data<float>();
|
||||||
for (size_t row = 0; row < input_rows; ++row) {
|
for (size_t row = 0; row < input_rows; ++row) {
|
||||||
math::decompress_and_dequantize(
|
math::decompress_and_dequantize(
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,6 @@ class GatherFused8BitRowwiseOp : public Operator<Context> {
|
||||||
bool DoRunWithType() {
|
bool DoRunWithType() {
|
||||||
const auto& data = Input(DATA);
|
const auto& data = Input(DATA);
|
||||||
const auto& indices = Input(INDICES);
|
const auto& indices = Input(INDICES);
|
||||||
auto* output = Output(0);
|
|
||||||
|
|
||||||
CAFFE_ENFORCE_EQ(data.dim(), 2, "DATA must be a matrix");
|
CAFFE_ENFORCE_EQ(data.dim(), 2, "DATA must be a matrix");
|
||||||
CAFFE_ENFORCE_EQ(indices.dim(), 1, "INDICES must be a vector");
|
CAFFE_ENFORCE_EQ(indices.dim(), 1, "INDICES must be a vector");
|
||||||
|
|
@ -29,7 +28,7 @@ class GatherFused8BitRowwiseOp : public Operator<Context> {
|
||||||
// Subtract 8 from the #columns of data for the 4 bytes for scale and 4
|
// Subtract 8 from the #columns of data for the 4 bytes for scale and 4
|
||||||
// bytes for bias that we use in the fused representation (per row).
|
// bytes for bias that we use in the fused representation (per row).
|
||||||
const std::vector<int64_t> shape = {indices.size(0), data.size(1) - 8};
|
const std::vector<int64_t> shape = {indices.size(0), data.size(1) - 8};
|
||||||
output->Resize(shape);
|
auto* output = Output(0, shape, at::dtype<float>());
|
||||||
|
|
||||||
int block_size = shape[1];
|
int block_size = shape[1];
|
||||||
auto block_bytesize = data.size_from_dim(1) * data.dtype().itemsize();
|
auto block_bytesize = data.size_from_dim(1) * data.dtype().itemsize();
|
||||||
|
|
|
||||||
|
|
@ -221,8 +221,6 @@ bool GenerateProposalsOp<CPUContext>::RunOnDevice() {
|
||||||
const auto& bbox_deltas = Input(1);
|
const auto& bbox_deltas = Input(1);
|
||||||
const auto& im_info_tensor = Input(2);
|
const auto& im_info_tensor = Input(2);
|
||||||
const auto& anchors = Input(3);
|
const auto& anchors = Input(3);
|
||||||
auto* out_rois = Output(0);
|
|
||||||
auto* out_rois_probs = Output(1);
|
|
||||||
|
|
||||||
CAFFE_ENFORCE_EQ(scores.dim(), 4, scores.dim());
|
CAFFE_ENFORCE_EQ(scores.dim(), 4, scores.dim());
|
||||||
CAFFE_ENFORCE(scores.template IsType<float>(), scores.dtype().name());
|
CAFFE_ENFORCE(scores.template IsType<float>(), scores.dtype().name());
|
||||||
|
|
@ -259,8 +257,8 @@ bool GenerateProposalsOp<CPUContext>::RunOnDevice() {
|
||||||
im_info_tensor.size(1));
|
im_info_tensor.size(1));
|
||||||
|
|
||||||
const int roi_col_count = box_dim + 1;
|
const int roi_col_count = box_dim + 1;
|
||||||
out_rois->Resize(0, roi_col_count);
|
auto* out_rois = Output(0, {0, roi_col_count}, at::dtype<float>());
|
||||||
out_rois_probs->Resize(0);
|
auto* out_rois_probs = Output(1, {0}, at::dtype<float>());
|
||||||
|
|
||||||
std::vector<ERArrXXf> im_boxes(num_images);
|
std::vector<ERArrXXf> im_boxes(num_images);
|
||||||
std::vector<EArrXf> im_probs(num_images);
|
std::vector<EArrXf> im_probs(num_images);
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ class GluOp final : public Operator<Context> {
|
||||||
|
|
||||||
bool RunOnDevice() {
|
bool RunOnDevice() {
|
||||||
auto& X = Input(0);
|
auto& X = Input(0);
|
||||||
auto* Y = Output(0);
|
|
||||||
vector<int64_t> Yshape;
|
vector<int64_t> Yshape;
|
||||||
Yshape.insert(Yshape.end(), X.sizes().begin(), X.sizes().end());
|
Yshape.insert(Yshape.end(), X.sizes().begin(), X.sizes().end());
|
||||||
const int split_index = dim_ == -1 ? Yshape.size() - 1 : dim_;
|
const int split_index = dim_ == -1 ? Yshape.size() - 1 : dim_;
|
||||||
|
|
@ -29,7 +29,7 @@ class GluOp final : public Operator<Context> {
|
||||||
const int M = X.size_to_dim(split_index);
|
const int M = X.size_to_dim(split_index);
|
||||||
const int N = X.size_from_dim(split_index + 1);
|
const int N = X.size_from_dim(split_index + 1);
|
||||||
Yshape[split_index] = split_dim_size;
|
Yshape[split_index] = split_dim_size;
|
||||||
Y->Resize(Yshape);
|
auto* Y = Output(0, Yshape, at::dtype<T>());
|
||||||
ComputeGlu(
|
ComputeGlu(
|
||||||
M,
|
M,
|
||||||
split_dim_size,
|
split_dim_size,
|
||||||
|
|
|
||||||
|
|
@ -52,10 +52,8 @@ class GroupNormOp final : public Operator<Context> {
|
||||||
T* mu_data = nullptr;
|
T* mu_data = nullptr;
|
||||||
T* rsig_data = nullptr;
|
T* rsig_data = nullptr;
|
||||||
if (OutputSize() == 3) {
|
if (OutputSize() == 3) {
|
||||||
auto* mu = Output(MU);
|
auto* mu = Output(MU, {N, G}, at::dtype<T>());
|
||||||
auto* rsig = Output(INV_SIGMA);
|
auto* rsig = Output(INV_SIGMA, {N, G}, at::dtype<T>());
|
||||||
mu->Resize(N, G);
|
|
||||||
rsig->Resize(N, G);
|
|
||||||
mu_data = mu->template mutable_data<T>();
|
mu_data = mu->template mutable_data<T>();
|
||||||
rsig_data = rsig->template mutable_data<T>();
|
rsig_data = rsig->template mutable_data<T>();
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -72,8 +72,6 @@ bool HSoftmaxOp<float, CPUContext>::RunOnDevice() {
|
||||||
const auto& W = Input(1);
|
const auto& W = Input(1);
|
||||||
const auto& b = Input(2);
|
const auto& b = Input(2);
|
||||||
auto& label = Input(3);
|
auto& label = Input(3);
|
||||||
auto* Y = Output(0);
|
|
||||||
auto* intermediate_output = Output(1);
|
|
||||||
|
|
||||||
// Batch size
|
// Batch size
|
||||||
int M = X.dim() > 1 ? X.dim32(0) : 1;
|
int M = X.dim() > 1 ? X.dim32(0) : 1;
|
||||||
|
|
@ -85,14 +83,14 @@ bool HSoftmaxOp<float, CPUContext>::RunOnDevice() {
|
||||||
// Sum of output dimensions of all hierarchy nodes
|
// Sum of output dimensions of all hierarchy nodes
|
||||||
int N = W.dim32(0);
|
int N = W.dim32(0);
|
||||||
CAFFE_ENFORCE_EQ(N, b.dim32(0));
|
CAFFE_ENFORCE_EQ(N, b.dim32(0));
|
||||||
Y->Resize(M);
|
auto* Y = Output(0, {M}, at::dtype<float>());
|
||||||
auto* Ydata = Y->template mutable_data<float>();
|
auto* Ydata = Y->template mutable_data<float>();
|
||||||
math::Set<float, CPUContext>(M, 0.f, Ydata, &context_);
|
math::Set<float, CPUContext>(M, 0.f, Ydata, &context_);
|
||||||
const auto* labeldata = label.data<int>();
|
const auto* labeldata = label.data<int>();
|
||||||
|
|
||||||
auto hierarchy = getHierarchyForLabels(M, labeldata, hierarchy_all_map_);
|
auto hierarchy = getHierarchyForLabels(M, labeldata, hierarchy_all_map_);
|
||||||
int int_output_size = getIntermediateOutputSize(labeldata, M, hierarchy);
|
int int_output_size = getIntermediateOutputSize(labeldata, M, hierarchy);
|
||||||
intermediate_output->Resize(int_output_size);
|
auto* intermediate_output = Output(1, {int_output_size}, at::dtype<float>());
|
||||||
float* int_output_data = intermediate_output->template mutable_data<float>();
|
float* int_output_data = intermediate_output->template mutable_data<float>();
|
||||||
int int_output_offset = 0;
|
int int_output_offset = 0;
|
||||||
|
|
||||||
|
|
@ -341,8 +339,7 @@ bool HSoftmaxSearchOp<float, CPUContext>::RunOnDevice() {
|
||||||
auto& X = Input(0);
|
auto& X = Input(0);
|
||||||
const auto& W = Input(1);
|
const auto& W = Input(1);
|
||||||
const auto& b = Input(2);
|
const auto& b = Input(2);
|
||||||
auto* Y_names = Output(0);
|
|
||||||
auto* Y_scores = Output(1);
|
|
||||||
// Batch size
|
// Batch size
|
||||||
int M = X.dim() > 1 ? X.dim32(0) : 1;
|
int M = X.dim() > 1 ? X.dim32(0) : 1;
|
||||||
// Input feature dimension
|
// Input feature dimension
|
||||||
|
|
@ -353,8 +350,8 @@ bool HSoftmaxSearchOp<float, CPUContext>::RunOnDevice() {
|
||||||
// Sum of output dimensions of all hierarchy nodes
|
// Sum of output dimensions of all hierarchy nodes
|
||||||
int N = W.dim32(0);
|
int N = W.dim32(0);
|
||||||
CAFFE_ENFORCE(N == b.dim32(0), "mismatch between Weight and Bias.");
|
CAFFE_ENFORCE(N == b.dim32(0), "mismatch between Weight and Bias.");
|
||||||
Y_names->Resize(M, top_n_);
|
auto* Y_names = Output(0, {M, top_n_}, at::dtype<string>());
|
||||||
Y_scores->Resize(M, top_n_);
|
auto* Y_scores = Output(1, {M, top_n_}, at::dtype<float>());
|
||||||
|
|
||||||
if (bias_multiplier_.numel() != M) {
|
if (bias_multiplier_.numel() != M) {
|
||||||
bias_multiplier_.Resize(M);
|
bias_multiplier_.Resize(M);
|
||||||
|
|
@ -418,10 +415,10 @@ bool HSoftmaxSearchOp<float, CPUContext>::RunOnDevice() {
|
||||||
template <typename T, class Context>
|
template <typename T, class Context>
|
||||||
bool HuffmanTreeHierarchyOp<T, Context>::RunOnDevice() {
|
bool HuffmanTreeHierarchyOp<T, Context>::RunOnDevice() {
|
||||||
const auto& Y = Input(0);
|
const auto& Y = Input(0);
|
||||||
auto treeOutput = Output(0);
|
|
||||||
CAFFE_ENFORCE_EQ(Y.dim(), 1, "Input labels must be a vector.");
|
CAFFE_ENFORCE_EQ(Y.dim(), 1, "Input labels must be a vector.");
|
||||||
const auto y_data = Y.template data<T>();
|
const auto y_data = Y.template data<T>();
|
||||||
treeOutput->Resize(1);
|
auto treeOutput = Output(0, {1}, at::dtype<string>());
|
||||||
std::vector<int> labelCounts;
|
std::vector<int> labelCounts;
|
||||||
labelCounts.resize(num_classes_, 0);
|
labelCounts.resize(num_classes_, 0);
|
||||||
for (int i = 0; i < Y.dim32(0); ++i) {
|
for (int i = 0; i < Y.dim32(0); ++i) {
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,6 @@ template <>
|
||||||
bool HeatmapMaxKeypointOp<float, CPUContext>::RunOnDevice() {
|
bool HeatmapMaxKeypointOp<float, CPUContext>::RunOnDevice() {
|
||||||
const auto& heatmaps_in = Input(0);
|
const auto& heatmaps_in = Input(0);
|
||||||
const auto& bboxes_in = Input(1);
|
const auto& bboxes_in = Input(1);
|
||||||
auto* keypoints_out = Output(0);
|
|
||||||
|
|
||||||
CAFFE_ENFORCE_EQ(heatmaps_in.dim(), 4);
|
CAFFE_ENFORCE_EQ(heatmaps_in.dim(), 4);
|
||||||
const int N = heatmaps_in.dim32(0);
|
const int N = heatmaps_in.dim32(0);
|
||||||
|
|
@ -61,7 +60,7 @@ bool HeatmapMaxKeypointOp<float, CPUContext>::RunOnDevice() {
|
||||||
} /* otherwise not initialized */
|
} /* otherwise not initialized */
|
||||||
|
|
||||||
// Resize and wrap outputs in Eigen
|
// Resize and wrap outputs in Eigen
|
||||||
keypoints_out->Resize(N, 4, keypoint_count);
|
auto* keypoints_out = Output(0, {N, 4, keypoint_count}, at::dtype<float>());
|
||||||
Eigen::Map<ERArrXXf> keypoints(
|
Eigen::Map<ERArrXXf> keypoints(
|
||||||
keypoints_out->mutable_data<float>(), N, 4 * keypoint_count);
|
keypoints_out->mutable_data<float>(), N, 4 * keypoint_count);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ class Im2ColOp final : public Operator<Context> {
|
||||||
|
|
||||||
bool RunOnDevice() override {
|
bool RunOnDevice() override {
|
||||||
auto& X = Input(0);
|
auto& X = Input(0);
|
||||||
auto* Y = Output(0);
|
|
||||||
CAFFE_ENFORCE(4 == X.dim());
|
CAFFE_ENFORCE(4 == X.dim());
|
||||||
|
|
||||||
int N = 0, C = 0, H = 0, W = 0;
|
int N = 0, C = 0, H = 0, W = 0;
|
||||||
|
|
@ -76,8 +76,10 @@ class Im2ColOp final : public Operator<Context> {
|
||||||
|
|
||||||
switch (order_) {
|
switch (order_) {
|
||||||
case StorageOrder::NCHW: {
|
case StorageOrder::NCHW: {
|
||||||
Y->Resize(
|
auto* Y = Output(
|
||||||
std::vector<int64_t>{N, C * kernel_h_ * kernel_w_, out_h, out_w});
|
0,
|
||||||
|
std::vector<int64_t>{N, C * kernel_h_ * kernel_w_, out_h, out_w},
|
||||||
|
at::dtype<T>());
|
||||||
|
|
||||||
const size_t dx = X.numel() / N;
|
const size_t dx = X.numel() / N;
|
||||||
const size_t dy = Y->numel() / N;
|
const size_t dy = Y->numel() / N;
|
||||||
|
|
@ -104,8 +106,10 @@ class Im2ColOp final : public Operator<Context> {
|
||||||
}
|
}
|
||||||
}; break;
|
}; break;
|
||||||
case StorageOrder::NHWC: {
|
case StorageOrder::NHWC: {
|
||||||
Y->Resize(
|
auto* Y = Output(
|
||||||
std::vector<int64_t>{N, out_h, out_w, kernel_h_ * kernel_w_ * C});
|
0,
|
||||||
|
std::vector<int64_t>{N, out_h, out_w, kernel_h_ * kernel_w_ * C},
|
||||||
|
at::dtype<T>());
|
||||||
|
|
||||||
const size_t dx = X.numel() / N;
|
const size_t dx = X.numel() / N;
|
||||||
const size_t dy = Y->numel() / N;
|
const size_t dy = Y->numel() / N;
|
||||||
|
|
|
||||||
|
|
@ -226,8 +226,8 @@ class IndexSizeOp : public Operator<CPUContext> {
|
||||||
|
|
||||||
bool RunOnDevice() override {
|
bool RunOnDevice() override {
|
||||||
auto& base = OperatorBase::Input<std::unique_ptr<IndexBase>>(0);
|
auto& base = OperatorBase::Input<std::unique_ptr<IndexBase>>(0);
|
||||||
auto* out = Output(0);
|
|
||||||
out->Resize(std::vector<int64_t>{});
|
auto* out = Output(0, std::vector<int64_t>{}, at::dtype<int64_tValue>());
|
||||||
*out->template mutable_data<int64_tValue>() = base->Size();
|
*out->template mutable_data<int64_tValue>() = base->Size();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,13 +16,13 @@ using ConstEigenMatrixMapRowMajor = Eigen::Map<
|
||||||
template <>
|
template <>
|
||||||
bool IntegralImageOp<float, CPUContext>::RunOnDevice() {
|
bool IntegralImageOp<float, CPUContext>::RunOnDevice() {
|
||||||
const auto& X = Input(0);
|
const auto& X = Input(0);
|
||||||
auto* Y = Output(0);
|
|
||||||
CAFFE_ENFORCE_EQ(X.dim(), 4, "Only supports 4D tensors for the momement");
|
CAFFE_ENFORCE_EQ(X.dim(), 4, "Only supports 4D tensors for the momement");
|
||||||
|
|
||||||
vector<int64_t> out_shape(X.sizes().vec());
|
vector<int64_t> out_shape(X.sizes().vec());
|
||||||
out_shape[2] += 1; // H + 1 output size
|
out_shape[2] += 1; // H + 1 output size
|
||||||
out_shape[3] += 1; // W + 1 output size
|
out_shape[3] += 1; // W + 1 output size
|
||||||
Y->Resize(out_shape);
|
auto* Y = Output(0, out_shape, at::dtype<float>());
|
||||||
const int ind = X.dim32(0);
|
const int ind = X.dim32(0);
|
||||||
const int chans = X.dim32(1);
|
const int chans = X.dim32(1);
|
||||||
const int rows_in = X.dim32(2);
|
const int rows_in = X.dim32(2);
|
||||||
|
|
|
||||||
|
|
@ -14,8 +14,8 @@ class IsEmptyOp : public Operator<Context> {
|
||||||
|
|
||||||
bool RunOnDevice() override {
|
bool RunOnDevice() override {
|
||||||
auto& input = Input(0);
|
auto& input = Input(0);
|
||||||
auto* output = Output(0);
|
|
||||||
output->Resize(std::vector<int64_t>{});
|
auto* output = Output(0, std::vector<int64_t>{}, at::dtype<bool>());
|
||||||
*output->template mutable_data<bool>() = (input.numel() == 0);
|
*output->template mutable_data<bool>() = (input.numel() == 0);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,8 +35,7 @@ class KeySplitOp : public Operator<Context> {
|
||||||
counts[k]++;
|
counts[k]++;
|
||||||
}
|
}
|
||||||
for (int k = 0; k < categorical_limit_; k++) {
|
for (int k = 0; k < categorical_limit_; k++) {
|
||||||
auto* eid = Output(k);
|
auto* eid = Output(k, {counts[k]}, at::dtype<int>());
|
||||||
eid->Resize(counts[k]);
|
|
||||||
eids[k] = eid->template mutable_data<int>();
|
eids[k] = eid->template mutable_data<int>();
|
||||||
counts[k] = 0;
|
counts[k] = 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,19 +31,12 @@ class LayerNormOp final : public Operator<Context> {
|
||||||
bool DoRunWithType() {
|
bool DoRunWithType() {
|
||||||
const auto& X = Input(0);
|
const auto& X = Input(0);
|
||||||
auto* Y = Output(0);
|
auto* Y = Output(0);
|
||||||
auto* mean = Output(1);
|
|
||||||
auto* sig = Output(2);
|
|
||||||
const int canonical_axis = X.canonical_axis_index(axis_);
|
const int canonical_axis = X.canonical_axis_index(axis_);
|
||||||
std::vector<int64_t> moments_dims(
|
std::vector<int64_t> moments_dims(
|
||||||
X.dims().cbegin(), X.dims().cbegin() + canonical_axis);
|
X.dims().cbegin(), X.dims().cbegin() + canonical_axis);
|
||||||
moments_dims.push_back(1);
|
moments_dims.push_back(1);
|
||||||
mean->Resize(moments_dims);
|
auto* mean = Output(1, moments_dims, at::dtype<T>());
|
||||||
sig->Resize(moments_dims);
|
auto* sig = Output(2, moments_dims, at::dtype<T>());
|
||||||
mean->template mutable_data<T>();
|
|
||||||
sig->template mutable_data<T>();
|
|
||||||
// TODO: change back
|
|
||||||
//auto* mean = Output(1, moments_dims, at::dtype<T>());
|
|
||||||
//auto* sig = Output(2, moments_dims, at::dtype<T>());
|
|
||||||
runLayerNorm<T>(X, Y, mean, sig, canonical_axis, epsilon_, &scale_, &bias_, &context_);
|
runLayerNorm<T>(X, Y, mean, sig, canonical_axis, epsilon_, &scale_, &bias_, &context_);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -49,8 +49,7 @@ class LengthsSplitOp final : public Operator<Context> {
|
||||||
"`n_split` must contain a positive value for defined behavior.");
|
"`n_split` must contain a positive value for defined behavior.");
|
||||||
const auto M = L.numel();
|
const auto M = L.numel();
|
||||||
|
|
||||||
auto* Y = Output(0);
|
auto* Y = Output(0, {M * n_split_}, at::dtype<int32_t>());
|
||||||
Y->Resize(M * n_split_);
|
|
||||||
|
|
||||||
const int32_t* Ldata = L.template data<int32_t>();
|
const int32_t* Ldata = L.template data<int32_t>();
|
||||||
int32_t* Ydata = Y->template mutable_data<int32_t>();
|
int32_t* Ydata = Y->template mutable_data<int32_t>();
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,6 @@ class LengthsPadOp : public Operator<Context> {
|
||||||
bool DoRunWithType() {
|
bool DoRunWithType() {
|
||||||
auto& data = Input(DATA);
|
auto& data = Input(DATA);
|
||||||
auto& lengths = Input(LENGTHS);
|
auto& lengths = Input(LENGTHS);
|
||||||
auto* output = Output(0);
|
|
||||||
|
|
||||||
CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS must be 1-D");
|
CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS must be 1-D");
|
||||||
CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
|
CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
|
||||||
|
|
@ -48,7 +47,7 @@ class LengthsPadOp : public Operator<Context> {
|
||||||
|
|
||||||
auto shape = data.sizes().vec();
|
auto shape = data.sizes().vec();
|
||||||
shape[0] = lengths_size * target_length_;
|
shape[0] = lengths_size * target_length_;
|
||||||
output->Resize(shape);
|
auto* output = Output(0, shape, at::dtype<T>());
|
||||||
|
|
||||||
auto block_size = data.size_from_dim(1);
|
auto block_size = data.size_from_dim(1);
|
||||||
auto src_data = data.template data<T>();
|
auto src_data = data.template data<T>();
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,6 @@ class SparseLengthsFused8BitRowwiseOp : public Operator<Context> {
|
||||||
const auto& data = Input(DATA);
|
const auto& data = Input(DATA);
|
||||||
const auto& indices = Input(INDICES);
|
const auto& indices = Input(INDICES);
|
||||||
const auto& lengths = Input(LENGTHS);
|
const auto& lengths = Input(LENGTHS);
|
||||||
auto* output = Output(0);
|
|
||||||
|
|
||||||
CAFFE_ENFORCE_EQ(indices.dim(), 1, "INDICES must be a vector");
|
CAFFE_ENFORCE_EQ(indices.dim(), 1, "INDICES must be a vector");
|
||||||
CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS must be a vector");
|
CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS must be a vector");
|
||||||
|
|
@ -51,7 +50,7 @@ class SparseLengthsFused8BitRowwiseOp : public Operator<Context> {
|
||||||
// Subtract 8 from the #columns of data for the 4 bytes for scale and 4
|
// Subtract 8 from the #columns of data for the 4 bytes for scale and 4
|
||||||
// bytes for bias that we use in the fused representation (per row).
|
// bytes for bias that we use in the fused representation (per row).
|
||||||
const std::vector<int64_t> shape = {lengths.size(0), data.size(1) - 8};
|
const std::vector<int64_t> shape = {lengths.size(0), data.size(1) - 8};
|
||||||
output->Resize(shape);
|
auto* output = Output(0, shape, at::dtype<float>());
|
||||||
|
|
||||||
Fused8BitRowwiseEmbeddingLookup(
|
Fused8BitRowwiseEmbeddingLookup(
|
||||||
/*block_size=*/output->size(1),
|
/*block_size=*/output->size(1),
|
||||||
|
|
|
||||||
|
|
@ -52,10 +52,9 @@ class CPUSparseLengthsReductionOp : public Operator<CPUContext> {
|
||||||
const int64_t M = lengthsInput.size(0);
|
const int64_t M = lengthsInput.size(0);
|
||||||
const int64_t indices_size = indicesInput.numel();
|
const int64_t indices_size = indicesInput.numel();
|
||||||
|
|
||||||
auto* output = Output(0);
|
|
||||||
auto shape = dataInput.sizes().vec();
|
auto shape = dataInput.sizes().vec();
|
||||||
shape[0] = M;
|
shape[0] = M;
|
||||||
output->Resize(shape);
|
auto* output = Output(0, shape, at::dtype<T>());
|
||||||
T* out_data = output->template mutable_data<T>();
|
T* out_data = output->template mutable_data<T>();
|
||||||
|
|
||||||
const InputType* in_data = dataInput.template data<InputType>();
|
const InputType* in_data = dataInput.template data<InputType>();
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ class SparseLengths8BitsRowwiseOp : public Operator<Context> {
|
||||||
bool DoRunWithType() {
|
bool DoRunWithType() {
|
||||||
auto& dataInput = Input(DATA);
|
auto& dataInput = Input(DATA);
|
||||||
auto& lengthsInput = Input(LENGTHS);
|
auto& lengthsInput = Input(LENGTHS);
|
||||||
auto* output = Output(0);
|
|
||||||
auto* scale_bias = Input(SCALE_BIAS).template data<float>();
|
auto* scale_bias = Input(SCALE_BIAS).template data<float>();
|
||||||
CAFFE_ENFORCE_EQ(1, lengthsInput.dim(), "LENGTHS must be a vector");
|
CAFFE_ENFORCE_EQ(1, lengthsInput.dim(), "LENGTHS must be a vector");
|
||||||
const int64_t outputSize = lengthsInput.size(0);
|
const int64_t outputSize = lengthsInput.size(0);
|
||||||
|
|
@ -58,7 +58,7 @@ class SparseLengths8BitsRowwiseOp : public Operator<Context> {
|
||||||
const int* lengths = lengthsInput.template data<int>();
|
const int* lengths = lengthsInput.template data<int>();
|
||||||
vector<int64_t> shape = dataInput.sizes().vec();
|
vector<int64_t> shape = dataInput.sizes().vec();
|
||||||
shape[0] = outputSize;
|
shape[0] = outputSize;
|
||||||
output->Resize(shape);
|
auto* output = Output(0, shape, at::dtype<OutDataT>());
|
||||||
const float* w = nullptr;
|
const float* w = nullptr;
|
||||||
if (USE_WEIGHTS) {
|
if (USE_WEIGHTS) {
|
||||||
w = Input(WEIGHTS).template data<float>();
|
w = Input(WEIGHTS).template data<float>();
|
||||||
|
|
@ -103,11 +103,11 @@ class FloatToRowwiseQuantized8BitsOp : public Operator<Context> {
|
||||||
bool RunOnDevice() override {
|
bool RunOnDevice() override {
|
||||||
auto& input = Input(DATA_FLOAT);
|
auto& input = Input(DATA_FLOAT);
|
||||||
auto* output = Output(DATA_UINT8);
|
auto* output = Output(DATA_UINT8);
|
||||||
auto* scale_bias = Output(SCALE_BIAS);
|
|
||||||
auto* input_data = input.template data<float>();
|
auto* input_data = input.template data<float>();
|
||||||
output->ResizeLike(input);
|
output->ResizeLike(input);
|
||||||
vector<int64_t> scale_bias_dims = {input.size(0), 2};
|
vector<int64_t> scale_bias_dims = {input.size(0), 2};
|
||||||
scale_bias->Resize(scale_bias_dims);
|
auto* scale_bias = Output(SCALE_BIAS, scale_bias_dims, at::dtype<float>());
|
||||||
auto* output_data = output->template mutable_data<uint8_t>();
|
auto* output_data = output->template mutable_data<uint8_t>();
|
||||||
float* scale_bias_data = scale_bias->template mutable_data<float>();
|
float* scale_bias_data = scale_bias->template mutable_data<float>();
|
||||||
size_t n_blocks = input.size(0);
|
size_t n_blocks = input.size(0);
|
||||||
|
|
|
||||||
|
|
@ -9,14 +9,11 @@ bool LengthsTopKOp<T, Context>::RunOnDevice() {
|
||||||
int N = Y.dim32(0);
|
int N = Y.dim32(0);
|
||||||
const T* X_data = X.template data<T>();
|
const T* X_data = X.template data<T>();
|
||||||
const int* input_len = Y.template data<int>();
|
const int* input_len = Y.template data<int>();
|
||||||
auto* output_topk_values = Output(TOPK_VALUES_OUT);
|
|
||||||
auto* output_topk_indices = Output(TOPK_INDICES_OUT);
|
|
||||||
|
|
||||||
output_topk_values->Resize(N * k_);
|
auto output_dims = std::vector<int64_t>({N, k_});
|
||||||
output_topk_indices->Resize(N * k_);
|
auto* output_topk_values = Output(TOPK_VALUES_OUT, output_dims, at::dtype<T>());
|
||||||
std::vector<int> output_dims = std::vector<int>({N, k_});
|
auto* output_topk_indices =
|
||||||
output_topk_values->Reshape(output_dims);
|
Output(TOPK_INDICES_OUT, output_dims, at::dtype<int>());
|
||||||
output_topk_indices->Reshape(output_dims);
|
|
||||||
T* output_topk_values_data = output_topk_values->template mutable_data<T>();
|
T* output_topk_values_data = output_topk_values->template mutable_data<T>();
|
||||||
int* output_topk_indices_data =
|
int* output_topk_indices_data =
|
||||||
output_topk_indices->template mutable_data<int>();
|
output_topk_indices->template mutable_data<int>();
|
||||||
|
|
@ -77,7 +74,6 @@ bool LengthsTopKGradientOp<T, Context>::RunOnDevice() {
|
||||||
auto& input_topk = Input(DER_TOPK_IN);
|
auto& input_topk = Input(DER_TOPK_IN);
|
||||||
CAFFE_ENFORCE_EQ(
|
CAFFE_ENFORCE_EQ(
|
||||||
input_topk.numel(), N * k_, "input_topk shape is not correct");
|
input_topk.numel(), N * k_, "input_topk shape is not correct");
|
||||||
auto* X_out = Output(DER_X_OUT);
|
|
||||||
|
|
||||||
const int* input_len_data = input_len.template data<int>();
|
const int* input_len_data = input_len.template data<int>();
|
||||||
const int* input_indices_data = input_indices.template data<int>();
|
const int* input_indices_data = input_indices.template data<int>();
|
||||||
|
|
@ -87,9 +83,7 @@ bool LengthsTopKGradientOp<T, Context>::RunOnDevice() {
|
||||||
for (int i = 0; i < N; i++) {
|
for (int i = 0; i < N; i++) {
|
||||||
num_indices += input_len_data[i];
|
num_indices += input_len_data[i];
|
||||||
}
|
}
|
||||||
X_out->Resize(num_indices);
|
auto* X_out = Output(DER_X_OUT, {num_indices}, at::dtype<T>());
|
||||||
std::vector<int> output_dims = std::vector<int>({num_indices});
|
|
||||||
X_out->Reshape(output_dims);
|
|
||||||
T* X_out_data = X_out->template mutable_data<T>();
|
T* X_out_data = X_out->template mutable_data<T>();
|
||||||
math::Set<T, Context>(num_indices, 0.0, X_out_data, &context_);
|
math::Set<T, Context>(num_indices, 0.0, X_out_data, &context_);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -176,14 +176,14 @@ bool LambdaRankNdcgOp<float, CPUContext>::RunOnDevice() {
|
||||||
auto& y = Input(PRED);
|
auto& y = Input(PRED);
|
||||||
auto& r = Input(REL);
|
auto& r = Input(REL);
|
||||||
auto& sid = Input(SESSION_LENS);
|
auto& sid = Input(SESSION_LENS);
|
||||||
auto* loss = Output(LOSS);
|
|
||||||
auto* dy = Output(DPRED);
|
auto* dy = Output(DPRED);
|
||||||
|
|
||||||
const auto* session_lengths = sid.template data<int>();
|
const auto* session_lengths = sid.template data<int>();
|
||||||
CAFFE_ENFORCE(y.dim() == 1);
|
CAFFE_ENFORCE(y.dim() == 1);
|
||||||
CAFFE_ENFORCE(y.numel() == r.numel());
|
CAFFE_ENFORCE(y.numel() == r.numel());
|
||||||
dy->Resize(y.numel());
|
dy->Resize(y.numel());
|
||||||
loss->Resize(sid.numel());
|
auto* loss = Output(LOSS, {sid.numel()}, at::dtype<float>());
|
||||||
auto loss_vec = loss->template mutable_data<float>();
|
auto loss_vec = loss->template mutable_data<float>();
|
||||||
int start_id = 0;
|
int start_id = 0;
|
||||||
for (int i = 0; i < sid.numel(); i++) {
|
for (int i = 0; i < sid.numel(); i++) {
|
||||||
|
|
@ -201,7 +201,7 @@ bool LambdaRankNdcgGradientOp<float, CPUContext>::RunOnDevice() {
|
||||||
auto& sids = Input(SESSION_LENS);
|
auto& sids = Input(SESSION_LENS);
|
||||||
auto& dy_cache = Input(DY_CACHE);
|
auto& dy_cache = Input(DY_CACHE);
|
||||||
auto& dLoss = Input(DLOSS);
|
auto& dLoss = Input(DLOSS);
|
||||||
auto* dy = Output(DY);
|
|
||||||
CAFFE_ENFORCE(y.dim() == 1);
|
CAFFE_ENFORCE(y.dim() == 1);
|
||||||
CAFFE_ENFORCE(dy_cache.dim() == 1);
|
CAFFE_ENFORCE(dy_cache.dim() == 1);
|
||||||
CAFFE_ENFORCE(dy_cache.numel() > 0);
|
CAFFE_ENFORCE(dy_cache.numel() > 0);
|
||||||
|
|
@ -212,7 +212,7 @@ bool LambdaRankNdcgGradientOp<float, CPUContext>::RunOnDevice() {
|
||||||
|
|
||||||
ConstEigenVectorArrayMap<float> dy_cache_vec(
|
ConstEigenVectorArrayMap<float> dy_cache_vec(
|
||||||
dy_cache.template data<float>(), dy_cache.numel());
|
dy_cache.template data<float>(), dy_cache.numel());
|
||||||
dy->Resize(dy_cache.numel());
|
auto* dy = Output(DY, {dy_cache.numel()}, at::dtype<float>());
|
||||||
EigenVectorArrayMap<float> dy_vec(
|
EigenVectorArrayMap<float> dy_vec(
|
||||||
dy->template mutable_data<float>(), dy->numel());
|
dy->template mutable_data<float>(), dy->numel());
|
||||||
auto multiplier = dLoss.template data<float>();
|
auto multiplier = dLoss.template data<float>();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user