#include "caffe2/operators/sequence_ops.h" #include "caffe2/core/operator.h" #include "caffe2/core/tensor.h" namespace caffe2 { vector TensorInferenceForAddPadding( const OperatorDef& def, const vector& in) { ArgumentHelper helper(def); const int padding_width = helper.GetSingleArgument("padding_width", 1); const int end_padding_width = helper.GetSingleArgument("end_padding_width", padding_width); CAFFE_ENFORCE_GT(in.size(), 0); CAFFE_ENFORCE_GE(in[0].dims_size(), 1); if (in.size() > 1) { CAFFE_ENFORCE_EQ(in[1].dims_size(), 1); } const auto num_paddings = (in.size() == 1 ? 1 : in[1].dims(0)); vector out_shape(in[0].dims().begin(), in[0].dims().end()); out_shape[0] += (padding_width + end_padding_width) * num_paddings; if (def.output_size() == 1) { return vector{CreateTensorShape(out_shape, in[0].data_type())}; } else { return vector{ CreateTensorShape(out_shape, in[0].data_type()), CreateTensorShape(vector(1, num_paddings), TensorProto::INT32)}; } } template <> template void GatherPaddingOp::GatherPadding( const int outer_size, const int lengths_size, const int block_size, const int pad_width, const T* in_ptr, const int* lengths_ptr, T* padding_start_ptr, T* padding_end_ptr) { CAFFE_ENFORCE( (!std::is_same::value), "GatherPadding should not be executed on an input of type bool, as " "addition is not properly defined with booleans."); int64_t total_length = 0; for (int i = 0; i < lengths_size; ++i) { // check total length consistency const auto length = lengths_ptr[i]; total_length += length; CAFFE_ENFORCE_LE(total_length, outer_size); // accumulate start paddings for (int j = 0; j < startPaddingWidth_; ++j) { for (int k = 0; k < block_size; ++k) { // Note: MSVC warns about unsafe use of type bool in operation. // This is now guarded by a CAFFE_ENFORCE so we can suppress it. #pragma warning(suppress: 4804) padding_start_ptr[k] += in_ptr[k]; } in_ptr += block_size; } in_ptr += block_size * (length - pad_width); // accumulate end paddings for (int j = 0; j < endPaddingWidth_; ++j) { for (int k = 0; k < block_size; ++k) { #pragma warning(suppress: 4804) padding_end_ptr[k] += in_ptr[k]; } in_ptr += block_size; } } } template <> template bool RemovePaddingOp::DoRunWithType() { const auto& in = Input(0); CAFFE_ENFORCE_GE(in.dim(), 1); const int32_t outer_size = in.sizes()[0]; const auto block_size = std::accumulate( // NOLINTNEXTLINE(modernize-use-transparent-functors) in.sizes().begin() + 1, in.sizes().end(), 1, std::multiplies()); const auto pad_width = startPaddingWidth_ + endPaddingWidth_; // if no lengths is provided, assume it is a single full-span entry const int32_t* lengths_ptr = &outer_size; int64_t lengths_size = 1; if (InputSize() > 1) { const auto& lengths = Input(1); lengths_ptr = lengths.data(); lengths_size = lengths.numel(); } auto out_dims = in.sizes().vec(); out_dims[0] -= pad_width * lengths_size; auto* out = Output(0, std::move(out_dims), at::dtype()); const auto* in_ptr = in.template data(); auto* out_ptr = out->template mutable_data(); int64_t total_length = 0; for (int i = 0; i < lengths_size; ++i) { // check that total length is consistent const auto length = lengths_ptr[i]; total_length += length; CAFFE_ENFORCE_LE(total_length, outer_size); std::copy( in_ptr + block_size * startPaddingWidth_, in_ptr + block_size * (length - endPaddingWidth_), out_ptr); in_ptr += block_size * length; out_ptr += block_size * (length - pad_width); } if (OutputSize() == 1) { return true; } auto* lengths_out = Output(1, {lengths_size}, at::dtype()); std::transform( lengths_ptr, lengths_ptr + lengths_size, lengths_out->template mutable_data(), [pad_width](int32_t x) { return x - pad_width; }); return true; } template <> template bool AddPaddingOp::MakePadding( const T* in_ptr, T* out_ptr, const int32_t* lengths_ptr, int32_t lengths_size, int32_t outer_size, const T* padding_start_ptr, const T* padding_end_ptr, int64_t block_size) { if (!lengths_ptr) { lengths_ptr = &outer_size; } int64_t total_length = 0; for (int i = 0; i < lengths_size; ++i) { // check that total length is consistent const auto length = lengths_ptr[i]; total_length += length; CAFFE_ENFORCE_LE(total_length, outer_size); // copy padding before if (!padding_start_ptr) { memset(out_ptr, 0, block_size * startPaddingWidth_ * sizeof(T)); out_ptr += block_size * startPaddingWidth_; } else { for (int j = 0; j < startPaddingWidth_; ++j) { std::copy(padding_start_ptr, padding_start_ptr + block_size, out_ptr); out_ptr += block_size; } } // copy payload const auto num_elems = block_size * length; std::copy(in_ptr, in_ptr + num_elems, out_ptr); in_ptr += num_elems; out_ptr += num_elems; // copy padding after if (!padding_end_ptr) { memset(out_ptr, 0, block_size * endPaddingWidth_ * sizeof(T)); out_ptr += block_size * endPaddingWidth_; } else { for (int j = 0; j < endPaddingWidth_; ++j) { std::copy(padding_end_ptr, padding_end_ptr + block_size, out_ptr); out_ptr += block_size; } } } if (OutputSize() == 1) { return true; } auto* lengths_out = Output(1, {lengths_size}, at::dtype()); const auto pad_width = startPaddingWidth_ + endPaddingWidth_; std::transform( lengths_ptr, lengths_ptr + lengths_size, lengths_out->template mutable_data(), [pad_width](int32_t x) { return x + pad_width; }); return true; } template <> bool PadEmptySamplesOp::RunOnDevice() { auto& lengths = Input(0); auto* lengthsPtr = lengths.template data(); CAFFE_ENFORCE(lengths.dim() == 1, "LENGTH should be 1-D"); CAFFE_ENFORCE(InputSize() >= 1, "Input size must be no less than 1"); int needPadding = 0; int sumLen = 0; for (int i = 0; i < lengths.numel(); ++i) { if (lengthsPtr[i] == 0) { needPadding++; } sumLen += lengthsPtr[i]; } auto* out_lengths = Output(0, {lengths.numel()}, at::dtype()); auto* outLengthsPtr = out_lengths->template mutable_data(); for (int i = 0; i < lengths.numel(); ++i) { if (lengthsPtr[i] == 0) { outLengthsPtr[i] = 1; } else { outLengthsPtr[i] = lengthsPtr[i]; } } for (int k = 0; k < InputSize() - 1; k++) { auto& features = Input(1 + k); CAFFE_ENFORCE(features.dim() >= 1, "FEATURE should at least 1-D"); CAFFE_ENFORCE( features.size(0) == sumLen, "FEATURE and LENGTH should be consistent"); const auto block_size = features.size_from_dim(1); auto* out_features = Output(1 + k); auto outDim = features.sizes().vec(); outDim.at(0) += needPadding; out_features->Resize(outDim); auto dst = static_cast(out_features->raw_mutable_data(features.dtype())); auto src_base = static_cast(features.raw_data()); // copy data and add padding index as zero Tensor zero{CPU}; zero.Resize(block_size); auto zeroPtr = static_cast(zero.raw_mutable_data(features.dtype())); // TODO Handle other composite types, such as vector<...> if (!features.dtype().Match()) { memset(zeroPtr, 0, zero.nbytes()); } int start_dest = 0; int start_src = 0; for (int i = 0; i < lengths.numel(); ++i) { if (lengthsPtr[i] == 0) { context_.CopyItemsSameDevice( features.dtype(), block_size, zeroPtr, dst + start_dest * features.dtype().itemsize()); start_dest += block_size; } else { auto src = src_base + start_src * features.dtype().itemsize(); context_.CopyItemsSameDevice( features.dtype(), lengthsPtr[i] * block_size, src, dst + start_dest * features.dtype().itemsize()); // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) start_src += lengthsPtr[i] * block_size; // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) start_dest += lengthsPtr[i] * block_size; } } } return true; } REGISTER_CPU_OPERATOR(AddPadding, AddPaddingOp); REGISTER_CPU_OPERATOR(RemovePadding, RemovePaddingOp); REGISTER_CPU_OPERATOR(GatherPadding, GatherPaddingOp); REGISTER_CPU_OPERATOR(PadEmptySamples, PadEmptySamplesOp); struct GetAddPaddingGradient : public GradientMakerBase { using GradientMakerBase::GradientMakerBase; vector GetGradientDefs() override { // whether to provide lengths as input to gradient vector g_inputs{GO(0)}; if (Def().input_size() > 1) { CAFFE_ENFORCE(Def().output_size() > 1); g_inputs.push_back(O(1)); } vector ops; // gradient on the data ops.push_back(CreateOperatorDef( "RemovePadding", "", g_inputs, vector{GI(0)})); // gradient on the start_padding (and end_padding) if (Def().input_size() >= 3) { std::vector padding_grads{GI(2)}; if (Def().input_size() == 4) { padding_grads.push_back(GI(3)); } // NOLINTNEXTLINE(performance-unnecessary-copy-initialization) auto g_inputs2 = g_inputs; ops.push_back( CreateOperatorDef("GatherPadding", "", g_inputs2, padding_grads)); } return ops; } }; REGISTER_GRADIENT(AddPadding, GetAddPaddingGradient); struct GetRemovePaddingGradient : public GradientMakerBase { using GradientMakerBase::GradientMakerBase; vector GetGradientDefs() override { // whether to provide lengths as input to gradient vector g_inputs{GO(0)}; if (Def().input_size() > 1) { CAFFE_ENFORCE(Def().output_size() > 1); g_inputs.push_back(O(1)); } return SingleGradientDef("AddPadding", "", g_inputs, vector{GI(0)}); } }; REGISTER_GRADIENT(RemovePadding, GetRemovePaddingGradient); OPERATOR_SCHEMA(AddPadding) .NumInputs(1, 4) .NumOutputs(1, 2) .TensorInferenceFunction( OpSchema::NeedsAllInputShapes(TensorInferenceForAddPadding)) .SetDoc(R"DOC( Given a partitioned tensor $T$, where the partitions are defined as ranges on its outer-most (slowest varying) dimension $N$, return a tensor $T<(N + 2 * padding\_width), D_1, ..., D_n>$ with paddings added to the start and end of each range. Optionally, different paddings can be provided for beginning and end. Paddings provided must be a tensor $T$. If no padding is provided, add zero padding. If no lengths vector is provided, add padding only once, at the start and end of data. Github Links: - https://github.com/pytorch/pytorch/blob/main/caffe2/operators/sequence_ops.cc
Example **Code** ``` workspace.ResetWorkspace() op = core.CreateOperator( "AddPadding", ["X", "lengths"], ["Y", "lengths_out"], padding_width=1 ) workspace.FeedBlob("X", (np.random.rand(3,2,2).astype(np.float32))) workspace.FeedBlob("lengths", np.array([3]).astype(np.int32)) print("X:", workspace.FetchBlob("X")) workspace.RunOperatorOnce(op) print("Y:", workspace.FetchBlob("Y")) print("lengths_out:", workspace.FetchBlob("lengths_out")) ``` **Result** ``` X: [[[0.2531572 0.4588472 ] [0.45140603 0.61161053]] [[0.92500854 0.8045306 ] [0.03356671 0.30233648]] [[0.4660227 0.6287745 ] [0.79372746 0.08609265]]] Y: [[[0. 0. ] [0. 0. ]] [[0.2531572 0.4588472 ] [0.45140603 0.61161053]] [[0.92500854 0.8045306 ] [0.03356671 0.30233648]] [[0.4660227 0.6287745 ] [0.79372746 0.08609265]] [[0. 0. ] [0. 0. ]]] lengths_out: [5] ```
)DOC") .Arg( "padding_width", "*(type: int)* Number of copies of padding to add around each range.") .Arg( "end_padding_width", "*(type: int)* [OPTIONAL] Specifies a different end-padding width. If " "this is not set, will use same as `padding_width`.") .Input( 0, "data_in", "*(type: Tensor)* Input data ($T$).") .Input( 1, "lengths", "*(type: Tensor``)* Number of elements in each range. " "sum(lengths) = N.") .Input( 2, "start_padding", "*(type: Tensor``)* [OPTIONAL] Padding data for range start " "($T$).") .Input( 3, "end_padding", "*(type: Tensor``)* [OPTIONAL] Padding for range end. If not " "provided, `start_padding` is used ($T$).") .Output( 0, "data_out", "*(type: Tensor)* Padded data tensor ($T$).") .Output( 1, "lengths_out", "*(type: Tensor``)* [OPTIONAL] Lengths for each padded range."); OPERATOR_SCHEMA(RemovePadding) .NumInputs(1, 2) .NumOutputs(1, 2) .SetDoc(R"DOC( Remove padding around the edges of each segment of the input data. This is the reverse operation of **AddPadding**, and uses the same arguments and conventions for input and output data format. Github Links: - https://github.com/pytorch/pytorch/blob/main/caffe2/operators/sequence_ops.cc
Example **Code** ``` workspace.ResetWorkspace() addpad_op = core.CreateOperator( "AddPadding", ["X", "lengths_add"], ["Y", "lengths_out_add"], padding_width=1 ) rmpad_op = core.CreateOperator( "RemovePadding", ["Y", "lengths_rm"], ["Z", "lengths_out_rm"], padding_width=1 ) workspace.FeedBlob("X", (np.random.randint(20, size=(3,5)))) workspace.FeedBlob("lengths_add", np.array([3]).astype(np.int32)) workspace.FeedBlob("lengths_rm", np.array([5]).astype(np.int32)) print("X:", workspace.FetchBlob("X")) workspace.RunOperatorOnce(addpad_op) print("Y:", workspace.FetchBlob("Y")) print("lengths_out_add:", workspace.FetchBlob("lengths_out_add")) workspace.RunOperatorOnce(rmpad_op) print("Z:", workspace.FetchBlob("Z")) print("lengths_out_rm:", workspace.FetchBlob("lengths_out_rm")) ``` **Result** ``` X: [[17 19 1 9 1] [19 3 5 19 1] [16 0 0 0 4]] Y: [[ 0 0 0 0 0] [17 19 1 9 1] [19 3 5 19 1] [16 0 0 0 4] [ 0 0 0 0 0]] lengths_out_add: [5] Z: [[17 19 1 9 1] [19 3 5 19 1] [16 0 0 0 4]] lengths_out_rm: [3] ```
)DOC") .Arg( "padding_width", "*(type: int)* Outer-size of padding to remove around each range.") .Arg( "end_padding_width", "*(type: int)* [OPTIONAL] Specifies a different end-padding width. " "If this is not set, will use same as `padding_width`.") .Input( 0, "data_in", "Input tensor ($T$).") .Input( 1, "lengths", "*(type: Tensor``)* Number of elements in each range. " "sum(lengths) = N. If not provided, considers all data as a single " "segment.") .Output( 0, "data_out", "*(type: Tensor)* Padded data tensor " "($T$).") .Output( 1, "lengths_out", "*(type: Tensor``)* [OPTIONAL] Lengths for each padded range."); OPERATOR_SCHEMA(GatherPadding) .NumInputs(2) .NumOutputs(1, 2) .SetDoc(R"DOC( Gather the sum of start and end paddings in a padded input sequence. Used in order to compute the gradients of AddPadding w.r.t the padding tensors. )DOC") .Arg("padding_width", "Outer-size of padding present around each range.") .Arg( "end_padding_width", "(Optional) Specifies a different end-padding width.") .Input(0, "data_in", "T Padded input data") .Input( 1, "lengths", "(i64) Num of elements in each range. sum(lengths) = N. " "If not provided, considers all data as a single segment.") .Output( 0, "padding_sum", "Sum of all start paddings, or of all " "paddings if end_padding_sum is not provided.") .Output( 1, "end_padding_sum", "T Sum of all end paddings, if provided."); OPERATOR_SCHEMA(PadEmptySamples) .NumInputs(1, INT_MAX) .NumOutputs(1, INT_MAX) .SetDoc(R"DOC( Pad empty field given lengths and index features, Input(0) is a blob pointing to the lengths of samples in one batch, [Input(1),... Input(num_fields)] a list of tensors containing the data for each field of the features. PadEmptySamples is thread safe. )DOC") .Input(0, "lengths", "A blob containing a pointer to the lengths.") .Output( 0, "out_lengths", "Tensor containing lengths with empty sample padded."); } // namespace caffe2