From add489fd8d19457d44a2a4cb716ac67976d1d687 Mon Sep 17 00:00:00 2001 From: Aliia Khasanova Date: Fri, 31 Oct 2025 04:58:35 -0700 Subject: [PATCH] Use `std::vector` instead of `std::vector>` in DynamicSliceThunk. `BufferAllocation::Slice` stores a raw pointer to the corresponding `BufferAllocation`. Now we keep the embedded thunk allocations alive by stroing unique_ptrs in the wrapping DynamicSliceThunk. The current design makes it hard to reuse the existing infrastructure, specifically to serialize `DynamicSliceThunk`. To address this, I'm changing fake_allocations to be `std::vector`. The move constructor `std::vector::vector(std::vector&&)` is guaranteed to have constant time complexity and therefore it steals the internal data buffer from the source vector. This infers that the pointers to allocations are kept stable as long as: * we preallocate the vector size * we never copy the vector, but move To make it safer for later usage, we can explicitely prohibid BufferAllocation to be copyable/moveable. I'm going to do this in the following cl. PiperOrigin-RevId: 826440060 --- .../xla/xla/backends/gpu/codegen/custom.cc | 60 +++-- .../gpu/runtime/command_buffer_cmd.cc | 2 +- .../backends/gpu/runtime/command_buffer_cmd.h | 4 +- .../gpu/runtime/command_buffer_cmd_emitter.cc | 4 +- .../gpu/runtime/command_buffer_thunk_test.cc | 28 ++- .../gpu/runtime/dynamic_slice_thunk.cc | 6 +- .../gpu/runtime/dynamic_slice_thunk.h | 7 +- .../gpu/runtime/dynamic_slice_thunk_test.cc | 207 ++++++++---------- 8 files changed, 142 insertions(+), 176 deletions(-) diff --git a/third_party/xla/xla/backends/gpu/codegen/custom.cc b/third_party/xla/xla/backends/gpu/codegen/custom.cc index ac6ab6a6a65..3edb7e69f32 100644 --- a/third_party/xla/xla/backends/gpu/codegen/custom.cc +++ b/third_party/xla/xla/backends/gpu/codegen/custom.cc @@ -613,7 +613,7 @@ absl::StatusOr EmitGemm( // different offset by creating new fake allocations so each operand will // have a different buffer index. The slices can thus always start at offset // 0. DynamicSliceThunk will take care of the offset adjustment. - std::vector> fake_allocations(4); + std::vector fake_allocations(4, {0, 0, 0}); if (fusion.shape().IsArray()) { TF_ASSIGN_OR_RETURN( output, GetResultSlice(buffer_assignment, adaptor, fusion, custom_call, @@ -645,10 +645,10 @@ absl::StatusOr EmitGemm( offset_buffer_indices, orig_shapes, sliced_shapes, offset_byte_sizes, extracted_offset_modules, arg_idx, can_compute_indvar_on_host, while_op, indvar_idx, inlined_module)); - fake_allocations[arg_idx] = std::make_unique( + fake_allocations[arg_idx] = BufferAllocation( /*index=*/arg_idx, workspace->size(), /*color=*/0); - slice_workspace_fake = BufferAllocation::Slice( - fake_allocations[arg_idx].get(), 0, workspace->size()); + slice_workspace_fake = BufferAllocation::Slice(&fake_allocations[arg_idx], + 0, workspace->size()); } if (absl::c_all_of(slice_instrs, [&](auto slice_instr) { @@ -676,27 +676,27 @@ absl::StatusOr EmitGemm( unsigned fake_arg_idx = 0; int64_t lhs_byte_size = ShapeUtil::ByteSizeOf(custom_call.operand(fake_arg_idx)->shape()); - fake_allocations[fake_arg_idx] = std::make_unique( + fake_allocations[fake_arg_idx] = BufferAllocation( /*index=*/fake_arg_idx, lhs_byte_size, /*color=*/0); - BufferAllocation::Slice slice_lhs_fake(fake_allocations[fake_arg_idx].get(), - 0, lhs_byte_size); + BufferAllocation::Slice slice_lhs_fake(&fake_allocations[fake_arg_idx], 0, + lhs_byte_size); fake_arg_idx++; int64_t rhs_byte_size = ShapeUtil::ByteSizeOf(custom_call.operand(fake_arg_idx)->shape()); - fake_allocations[fake_arg_idx] = std::make_unique( + fake_allocations[fake_arg_idx] = BufferAllocation( /*index=*/fake_arg_idx, rhs_byte_size, /*color=*/0); - BufferAllocation::Slice slice_rhs_fake(fake_allocations[fake_arg_idx].get(), - 0, rhs_byte_size); + BufferAllocation::Slice slice_rhs_fake(&fake_allocations[fake_arg_idx], 0, + rhs_byte_size); fake_arg_idx++; int64_t out_fake_byte_size = ShapeUtil::ByteSizeOf( custom_call.shape().IsArray() ? custom_call.shape() : custom_call.shape().tuple_shapes(0)); - fake_allocations[fake_arg_idx] = std::make_unique( + fake_allocations[fake_arg_idx] = BufferAllocation( /*index=*/fake_arg_idx, out_fake_byte_size, /*color=*/0); - BufferAllocation::Slice slice_out_fake(fake_allocations[fake_arg_idx].get(), - 0, out_fake_byte_size); + BufferAllocation::Slice slice_out_fake(&fake_allocations[fake_arg_idx], 0, + out_fake_byte_size); ThunkSequence seq; seq.emplace_back(std::make_unique( thunk_info, std::move(config), slice_lhs_fake, slice_rhs_fake, @@ -962,7 +962,7 @@ absl::StatusOr EmitCustomCall( ir_emitter_context.platform_name()); }; - std::vector> fake_allocations(num_args); + std::vector fake_allocations(num_args, {0, 0, 0}); if (absl::c_any_of(slice_instrs, IsDynamicSliceOrDynamicUpdateSlice)) { // Creating embedded custom call thunk. unsigned fake_arg_idx = 0; @@ -982,10 +982,10 @@ absl::StatusOr EmitCustomCall( } int64_t operand_byte_size = ShapeUtil::ByteSizeOf(subshape); - fake_allocations[fake_arg_idx] = std::make_unique( + fake_allocations[fake_arg_idx] = BufferAllocation( /*index=*/fake_arg_idx, operand_byte_size, /*color=*/0); - BufferAllocation::Slice fake_slice( - fake_allocations[fake_arg_idx].get(), 0, operand_byte_size); + BufferAllocation::Slice fake_slice(&fake_allocations[fake_arg_idx], + 0, operand_byte_size); fake_arg_idx++; fake_operands.push_back(ShapedSlice{fake_slice, subshape}); @@ -1007,10 +1007,10 @@ absl::StatusOr EmitCustomCall( } int64_t result_byte_size = ShapeUtil::ByteSizeOf(subshape); - fake_allocations[fake_arg_idx] = std::make_unique( + fake_allocations[fake_arg_idx] = BufferAllocation( /*index=*/fake_arg_idx, result_byte_size, /*color=*/0); - BufferAllocation::Slice fake_slice( - fake_allocations[fake_arg_idx].get(), 0, result_byte_size); + BufferAllocation::Slice fake_slice(&fake_allocations[fake_arg_idx], 0, + result_byte_size); fake_arg_idx++; fake_results.push_back(ShapedSlice{fake_slice, subshape}); @@ -1065,7 +1065,7 @@ using Slices = std::vector; // fake_arguments: the fake slices of the inputs/outputs of the hero // instruction, when the slicing is dynamic. struct SliceDataForCollectives { - std::vector> fake_allocations; + std::vector fake_allocations; std::vector slice_instrs; Slices arguments, fake_arguments; std::vector>> @@ -1076,7 +1076,7 @@ struct SliceDataForCollectives { std::unique_ptr init_module, update_module; bool isDynamic, can_compute_indvar_on_host; explicit SliceDataForCollectives(int num_args) - : fake_allocations(num_args), + : fake_allocations(num_args, {0, 0, 0}), slice_instrs(num_args), arguments(num_args, std::nullopt), fake_arguments(num_args, std::nullopt), @@ -1197,11 +1197,10 @@ CollectSliceArgumentMetadataForCollectives( unsigned fake_arg_idx = 0; for (HloInstruction* operand : instr->operands()) { int64_t operand_byte_size = ShapeUtil::ByteSizeOf(operand->shape()); - slice_data.fake_allocations[fake_arg_idx] = - std::make_unique( - /*index=*/fake_arg_idx, operand_byte_size, /*color=*/0); + slice_data.fake_allocations[fake_arg_idx] = BufferAllocation( + /*index=*/fake_arg_idx, operand_byte_size, /*color=*/0); BufferAllocation::Slice fake_slice( - /*allocation=*/slice_data.fake_allocations[fake_arg_idx].get(), + /*allocation=*/&slice_data.fake_allocations[fake_arg_idx], /*offset=*/0, /*size=*/operand_byte_size); slice_data.fake_arguments[fake_arg_idx] = fake_slice; @@ -1217,12 +1216,11 @@ CollectSliceArgumentMetadataForCollectives( } for (const HloInstruction* user : collective_results) { int64_t out_fake_byte_size = ShapeUtil::ByteSizeOf(user->shape()); - slice_data.fake_allocations[fake_arg_idx] = - std::make_unique( - /*index=*/fake_arg_idx, /*size=*/out_fake_byte_size, - /*color=*/0); + slice_data.fake_allocations[fake_arg_idx] = BufferAllocation( + /*index=*/fake_arg_idx, /*size=*/out_fake_byte_size, + /*color=*/0); BufferAllocation::Slice fake_slice( - /*allocation=*/slice_data.fake_allocations[fake_arg_idx].get(), + /*allocation=*/&slice_data.fake_allocations[fake_arg_idx], /*offset=*/0, /*size=*/out_fake_byte_size); slice_data.fake_arguments[fake_arg_idx] = fake_slice; fake_arg_idx++; diff --git a/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd.cc b/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd.cc index 38575ae6b75..3128e938c9e 100644 --- a/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd.cc +++ b/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd.cc @@ -2377,7 +2377,7 @@ CommandBufferCmd::BufferUseVector CollectiveBroadcastCmd::buffers() const { DynamicSliceFusionCmd::DynamicSliceFusionCmd( CommandBufferCmdExecutor embedded_commands, std::vector> arguments, - std::vector> fake_allocations, + std::vector fake_allocations, std::vector>> offsets, std::vector> orig_shapes, std::vector> sliced_shapes, diff --git a/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd.h b/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd.h index b0c7543511f..b624f207c38 100644 --- a/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd.h +++ b/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd.h @@ -1217,7 +1217,7 @@ class DynamicSliceFusionCmd : public CommandBufferCmd { DynamicSliceFusionCmd( CommandBufferCmdExecutor embedded_commands, std::vector> arguments, - std::vector> fake_allocations_, + std::vector fake_allocations, std::vector>> offsets, std::vector> orig_shapes, @@ -1252,7 +1252,7 @@ class DynamicSliceFusionCmd : public CommandBufferCmd { private: CommandBufferCmdExecutor embedded_commands_; std::vector slices_; - std::vector> fake_allocations_; + std::vector fake_allocations_; // Pinned host memory for transferring offset values from device to host. absl::Mutex mutex_; diff --git a/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd_emitter.cc b/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd_emitter.cc index 9cb965bbd79..dc33b07659f 100644 --- a/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd_emitter.cc +++ b/third_party/xla/xla/backends/gpu/runtime/command_buffer_cmd_emitter.cc @@ -197,10 +197,10 @@ static absl::StatusOr Convert( ConvertToCommands(thunk.get_embedded_thunk()->thunks(), options)); auto& thunk_fake_allocations = thunk.get_fake_allocations(); - std::vector> fake_allocations; + std::vector fake_allocations; for (auto it = thunk_fake_allocations.begin(); it != thunk_fake_allocations.end(); ++it) { - fake_allocations.push_back(std::make_unique(**it)); + fake_allocations.push_back(BufferAllocation(*it)); } return std::make_unique( std::move(embedded_cmds), thunk.get_arguments(), diff --git a/third_party/xla/xla/backends/gpu/runtime/command_buffer_thunk_test.cc b/third_party/xla/xla/backends/gpu/runtime/command_buffer_thunk_test.cc index 3d2bc93daaf..245c5b65689 100644 --- a/third_party/xla/xla/backends/gpu/runtime/command_buffer_thunk_test.cc +++ b/third_party/xla/xla/backends/gpu/runtime/command_buffer_thunk_test.cc @@ -949,24 +949,22 @@ TEST(CommandBufferThunkTest, DISABLED_DynamicSliceFusionCmd) { TF_ASSERT_OK(stream->MemZero(&workspace, 1024 * 1024)); // Prepare buffer allocations for recording command buffer. - std::vector> fake_allocations(4); - fake_allocations[0] = std::make_unique( + std::vector fake_allocations; + fake_allocations.reserve(4); + fake_allocations.emplace_back( /*index=*/0, fake_lhs_length, /*color=*/0); - fake_allocations[1] = - std::make_unique(/*index=*/1, rhs_length, /*color=*/0); - fake_allocations[2] = - std::make_unique(/*index=*/2, out_length, - /*color=*/0); + fake_allocations.emplace_back( + /*index=*/1, rhs_length, /*color=*/0); + fake_allocations.emplace_back(/*index=*/2, out_length, + /*color=*/0); - fake_allocations[3] = - std::make_unique(/*index=*/3, 1024 * 1024, - /*color=*/0); - BufferAllocation::Slice fake_slice_lhs(fake_allocations[0].get(), 0, + fake_allocations.emplace_back(/*index=*/3, 1024 * 1024, + /*color=*/0); + BufferAllocation::Slice fake_slice_lhs(&fake_allocations[0], 0, fake_lhs_length); - BufferAllocation::Slice slice_rhs(fake_allocations[1].get(), 0, rhs_length); - BufferAllocation::Slice slice_out(fake_allocations[2].get(), 0, out_length); - BufferAllocation::Slice slice_workspace(fake_allocations[3].get(), 0, - 1024 * 1024); + BufferAllocation::Slice slice_rhs(&fake_allocations[1], 0, rhs_length); + BufferAllocation::Slice slice_out(&fake_allocations[2], 0, out_length); + BufferAllocation::Slice slice_workspace(&fake_allocations[3], 0, 1024 * 1024); auto config = GemmConfig::For( ShapeUtil::MakeShape(PrimitiveType::F32, {2, 4}), {}, {1}, ShapeUtil::MakeShape(PrimitiveType::F32, {4, 3}), {}, {0}, diff --git a/third_party/xla/xla/backends/gpu/runtime/dynamic_slice_thunk.cc b/third_party/xla/xla/backends/gpu/runtime/dynamic_slice_thunk.cc index 747e0ecd151..98763d288d4 100644 --- a/third_party/xla/xla/backends/gpu/runtime/dynamic_slice_thunk.cc +++ b/third_party/xla/xla/backends/gpu/runtime/dynamic_slice_thunk.cc @@ -175,7 +175,7 @@ std::string DynamicSliceThunk::SliceDef::ToString() const { DynamicSliceThunk::DynamicSliceThunk( ThunkInfo thunk_info, std::unique_ptr embedded_thunk, std::vector> arguments, - std::vector> fake_allocations, + std::vector fake_allocations, std::vector>> offsets, std::vector> orig_shapes, std::vector> sliced_shapes, @@ -687,8 +687,8 @@ absl::StatusOr> DynamicSliceThunk::FromProto( return std::make_unique( thunk_info, std::make_unique(std::move(embedded_thunks)), std::move(arguments), - /*fake_allocations=*/std::vector>(), - std::move(offsets), std::move(orig_shapes), std::move(sliced_shapes), + /*fake_allocations=*/std::vector(), std::move(offsets), + std::move(orig_shapes), std::move(sliced_shapes), std::move(offset_byte_sizes), std::move(offset_as_function_of_indvar_metadata)); } diff --git a/third_party/xla/xla/backends/gpu/runtime/dynamic_slice_thunk.h b/third_party/xla/xla/backends/gpu/runtime/dynamic_slice_thunk.h index f6574b2927d..c0a08b33539 100644 --- a/third_party/xla/xla/backends/gpu/runtime/dynamic_slice_thunk.h +++ b/third_party/xla/xla/backends/gpu/runtime/dynamic_slice_thunk.h @@ -114,7 +114,7 @@ class DynamicSliceThunk : public Thunk { DynamicSliceThunk( ThunkInfo thunk_info, std::unique_ptr embedded_thunk, std::vector> arguments, - std::vector> fake_allocations, + std::vector fake_allocations, std::vector>> offsets, std::vector> orig_shapes, std::vector> sliced_shapes, @@ -150,8 +150,7 @@ class DynamicSliceThunk : public Thunk { return arguments_; } - const std::vector>& get_fake_allocations() - const { + const std::vector& get_fake_allocations() const { return fake_allocations_; } @@ -204,7 +203,7 @@ class DynamicSliceThunk : public Thunk { private: std::unique_ptr embedded_thunk_; std::vector> arguments_; - std::vector> fake_allocations_; + std::vector fake_allocations_; std::vector>> offsets_; std::vector> orig_shapes_; std::vector> sliced_shapes_; diff --git a/third_party/xla/xla/backends/gpu/runtime/dynamic_slice_thunk_test.cc b/third_party/xla/xla/backends/gpu/runtime/dynamic_slice_thunk_test.cc index 5e99413914b..6c09bb8a228 100644 --- a/third_party/xla/xla/backends/gpu/runtime/dynamic_slice_thunk_test.cc +++ b/third_party/xla/xla/backends/gpu/runtime/dynamic_slice_thunk_test.cc @@ -161,28 +161,24 @@ absl::StatusOr> CreateSlicedGemmThunk( int64_t out_length = sizeof(float) * 1 * 1; int64_t offset_length = sizeof(int64_t); // Preparing buffer allocation slices for thunk creations. - std::vector> fake_allocations; - fake_allocations.push_back( - std::make_unique(/*index=*/0, rhs_length, /*color=*/0)); - BufferAllocation::Slice slice_lhs_fake(fake_allocations.back().get(), 0, + std::vector fake_allocations; + fake_allocations.reserve(4); + + fake_allocations.emplace_back(/*index=*/0, rhs_length, /*color=*/0); + BufferAllocation::Slice slice_lhs_fake(&fake_allocations.back(), 0, rhs_length); auto alloc_lhs = std::make_unique(/*index=*/0, lhs_length, /*color=*/0); BufferAllocation::Slice slice_lhs(alloc_lhs.get(), 0, lhs_length); - fake_allocations.push_back( - std::make_unique(/*index=*/1, rhs_length, /*color=*/0)); - BufferAllocation::Slice slice_rhs(fake_allocations.back().get(), 0, - rhs_length); + fake_allocations.emplace_back(/*index=*/1, rhs_length, /*color=*/0); + BufferAllocation::Slice slice_rhs(&fake_allocations.back(), 0, rhs_length); - fake_allocations.push_back( - std::make_unique(/*index=*/2, out_length, /*color=*/0)); - BufferAllocation::Slice slice_out(fake_allocations.back().get(), 0, - out_length); + fake_allocations.emplace_back(/*index=*/2, out_length, /*color=*/0); + BufferAllocation::Slice slice_out(&fake_allocations.back(), 0, out_length); - fake_allocations.push_back(std::make_unique( - /*index=*/3, 1024 * 1024, /*color=*/0)); - BufferAllocation::Slice slice_workspace(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/3, 1024 * 1024, /*color=*/0); + BufferAllocation::Slice slice_workspace(&fake_allocations.back(), 0, 1024 * 1024); auto alloc_lhs_offset_0 = std::make_unique( @@ -323,14 +319,13 @@ CreateMultipleSlicedOperandsGemmThunk( int64_t offset_length = sizeof(int64_t); int64_t slice_length = sizeof(float) * 3; // Preparing buffer allocation slices for thunk creations. - std::vector> fake_allocations; - fake_allocations.push_back(std::make_unique( - /*index=*/0, slice_length, /*color=*/0)); - BufferAllocation::Slice slice_lhs_fake(fake_allocations.back().get(), 0, + std::vector fake_allocations; + fake_allocations.reserve(4); + fake_allocations.emplace_back(/*index=*/0, slice_length, /*color=*/0); + BufferAllocation::Slice slice_lhs_fake(&fake_allocations.back(), 0, slice_length); - fake_allocations.push_back(std::make_unique( - /*index=*/1, slice_length, /*color=*/0)); - BufferAllocation::Slice slice_rhs_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/1, slice_length, /*color=*/0); + BufferAllocation::Slice slice_rhs_fake(&fake_allocations.back(), 0, slice_length); auto alloc_lhs = std::make_unique(/*index=*/0, length, /*color=*/0); @@ -338,13 +333,10 @@ CreateMultipleSlicedOperandsGemmThunk( auto alloc_rhs = std::make_unique(/*index=*/1, length, /*color=*/0); BufferAllocation::Slice slice_rhs(alloc_rhs.get(), 0, length); - fake_allocations.push_back( - std::make_unique(/*index=*/2, out_length, /*color=*/0)); - BufferAllocation::Slice slice_out(fake_allocations.back().get(), 0, - out_length); - fake_allocations.push_back(std::make_unique( - /*index=*/3, 1024 * 1024, /*color=*/0)); - BufferAllocation::Slice slice_workspace(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/2, out_length, /*color=*/0); + BufferAllocation::Slice slice_out(&fake_allocations.back(), 0, out_length); + fake_allocations.emplace_back(/*index=*/3, 1024 * 1024, /*color=*/0); + BufferAllocation::Slice slice_workspace(&fake_allocations.back(), 0, 1024 * 1024); auto alloc_lhs_offset_0 = std::make_unique( /*index=*/4, offset_length, /*color=*/0); @@ -540,21 +532,19 @@ TEST_F(DynamicSliceThunkTest, SlicedMemcpy) { // Prepare embedded and dynamic slice thunks. // Preparing buffer allocation slices for thunk creations. - std::vector> fake_allocations(2); + std::vector fake_allocations; + fake_allocations.reserve(2); // Fake slices for embedded thunk creation. - fake_allocations.push_back(std::make_unique( - /*index=*/0, slice_length, /*color=*/0)); - BufferAllocation::Slice slice_src_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/0, slice_length, /*color=*/0); + BufferAllocation::Slice slice_src_fake(&fake_allocations.back(), 0, slice_length); BufferAllocation alloc_src(/*index=*/0, src_length, /*color=*/0); BufferAllocation::Slice slice_src(&alloc_src, 0, src_length); - fake_allocations.push_back( - std::make_unique(/*index=*/1, dst_length, /*color=*/0)); - BufferAllocation::Slice slice_dst(fake_allocations.back().get(), 0, - dst_length); + fake_allocations.emplace_back(/*index=*/1, dst_length, /*color=*/0); + BufferAllocation::Slice slice_dst(&fake_allocations.back(), 0, dst_length); BufferAllocation alloc_offset_0(/*index=*/2, offset_length, /*color=*/0); BufferAllocation::Slice slice_offset_0(&alloc_offset_0, 0, offset_length); @@ -678,17 +668,16 @@ TEST_F(DynamicSliceThunkTest, SlicedOutputMemcpy) { // Prepare embedded and dynamic slice thunks. // Preparing buffer allocation slices for thunk creations. - std::vector> fake_allocations(2); + std::vector fake_allocations; + fake_allocations.reserve(2); // Fake slices for embedded thunk creation. - fake_allocations.push_back(std::make_unique( - /*index=*/0, slice_length, /*color=*/0)); - BufferAllocation::Slice slice_src_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/0, slice_length, /*color=*/0); + BufferAllocation::Slice slice_src_fake(&fake_allocations.back(), 0, slice_length); - fake_allocations.push_back(std::make_unique( - /*index=*/1, slice_length, /*color=*/0)); - BufferAllocation::Slice slice_dst_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/1, slice_length, /*color=*/0); + BufferAllocation::Slice slice_dst_fake(&fake_allocations.back(), 0, slice_length); BufferAllocation alloc_src(/*index=*/0, src_length, /*color=*/0); @@ -868,22 +857,19 @@ CreateSlicedGemmArbitraryArgumentOrderThunk( int64_t offset_length = sizeof(int64_t); // Preparing buffer allocation slices for thunk creations. - std::vector> fake_allocations; - fake_allocations.push_back( - std::make_unique(/*index=*/0, rhs_length, /*color=*/0)); - BufferAllocation::Slice slice_lhs_fake(fake_allocations.back().get(), 0, + std::vector fake_allocations; + fake_allocations.reserve(4); + fake_allocations.emplace_back(/*index=*/0, rhs_length, /*color=*/0); + BufferAllocation::Slice slice_lhs_fake(&fake_allocations.back(), 0, rhs_length); - fake_allocations.push_back( - std::make_unique(/*index=*/1, rhs_length, /*color=*/0)); - BufferAllocation::Slice slice_rhs_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/1, rhs_length, /*color=*/0); + BufferAllocation::Slice slice_rhs_fake(&fake_allocations.back(), 0, rhs_length); - fake_allocations.push_back( - std::make_unique(/*index=*/2, out_length, /*color=*/0)); - BufferAllocation::Slice slice_out_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/2, out_length, /*color=*/0); + BufferAllocation::Slice slice_out_fake(&fake_allocations.back(), 0, out_length); - fake_allocations.push_back(std::make_unique( - /*index=*/3, 1024 * 1024, /*color=*/0)); - BufferAllocation::Slice slice_workspace_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/3, 1024 * 1024, /*color=*/0); + BufferAllocation::Slice slice_workspace_fake(&fake_allocations.back(), 0, 1024 * 1024); auto alloc_lhs = @@ -1043,22 +1029,19 @@ CreateSlicedGemmArbitraryNumberOfArgumentsThunk( int64_t offset_length = sizeof(int64_t); // Preparing buffer allocation slices for thunk creations. - std::vector> fake_allocations; - fake_allocations.push_back( - std::make_unique(/*index=*/0, rhs_length, /*color=*/0)); - BufferAllocation::Slice slice_lhs_fake(fake_allocations.back().get(), 0, + std::vector fake_allocations; + fake_allocations.reserve(4); + fake_allocations.emplace_back(/*index=*/0, rhs_length, /*color=*/0); + BufferAllocation::Slice slice_lhs_fake(&fake_allocations.back(), 0, rhs_length); - fake_allocations.push_back( - std::make_unique(/*index=*/1, rhs_length, /*color=*/0)); - BufferAllocation::Slice slice_rhs_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/1, rhs_length, /*color=*/0); + BufferAllocation::Slice slice_rhs_fake(&fake_allocations.back(), 0, rhs_length); - fake_allocations.push_back( - std::make_unique(/*index=*/2, out_length, /*color=*/0)); - BufferAllocation::Slice slice_out_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/2, out_length, /*color=*/0); + BufferAllocation::Slice slice_out_fake(&fake_allocations.back(), 0, out_length); - fake_allocations.push_back(std::make_unique( - /*index=*/3, 1024 * 1024, /*color=*/0)); - BufferAllocation::Slice slice_workspace_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/3, 1024 * 1024, /*color=*/0); + BufferAllocation::Slice slice_workspace_fake(&fake_allocations.back(), 0, 1024 * 1024); auto alloc_lhs = @@ -1220,29 +1203,24 @@ CreateSlicedTupledOperandGemmThunk( int64_t offset_length = sizeof(int64_t); // Preparing buffer allocation slices for thunk creations. - std::vector> fake_allocations; - fake_allocations.push_back( - std::make_unique(/*index=*/0, rhs_length, /*color=*/0)); - BufferAllocation::Slice slice_lhs_fake(fake_allocations.back().get(), 0, + std::vector fake_allocations; + fake_allocations.reserve(4); + fake_allocations.emplace_back(/*index=*/0, rhs_length, /*color=*/0); + BufferAllocation::Slice slice_lhs_fake(&fake_allocations.back(), 0, rhs_length); auto alloc_lhs = std::make_unique( /*index=*/0, 3 * lhs_length, /*color=*/0); BufferAllocation::Slice slice_lhs(alloc_lhs.get(), lhs_length, lhs_length); - fake_allocations.push_back( - std::make_unique(/*index=*/1, rhs_length, /*color=*/0)); - BufferAllocation::Slice slice_rhs(fake_allocations.back().get(), 0, - rhs_length); + fake_allocations.emplace_back(/*index=*/1, rhs_length, /*color=*/0); + BufferAllocation::Slice slice_rhs(&fake_allocations.back(), 0, rhs_length); - fake_allocations.push_back( - std::make_unique(/*index=*/2, out_length, /*color=*/0)); - BufferAllocation::Slice slice_out(fake_allocations.back().get(), 0, - out_length); + fake_allocations.emplace_back(/*index=*/2, out_length, /*color=*/0); + BufferAllocation::Slice slice_out(&fake_allocations.back(), 0, out_length); - fake_allocations.push_back(std::make_unique( - /*index=*/3, 1024 * 1024, /*color=*/0)); - BufferAllocation::Slice slice_workspace(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/3, 1024 * 1024, /*color=*/0); + BufferAllocation::Slice slice_workspace(&fake_allocations.back(), 0, 1024 * 1024); auto alloc_lhs_offset_0 = std::make_unique( @@ -1398,17 +1376,16 @@ TEST_F(DynamicSliceThunkTest, SlicedMemcpyOOB) { // Prepare embedded and dynamic slice thunks. // Preparing buffer allocation slices for thunk creations. - std::vector> fake_allocations(2); + std::vector fake_allocations; + fake_allocations.reserve(2); // Fake slices for embedded thunk creation. - fake_allocations.push_back(std::make_unique( - /*index=*/0, slice_length, /*color=*/0)); - BufferAllocation::Slice slice_src_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/0, slice_length, /*color=*/0); + BufferAllocation::Slice slice_src_fake(&fake_allocations.back(), 0, slice_length); - fake_allocations.push_back(std::make_unique( - /*index=*/1, slice_length, /*color=*/0)); - BufferAllocation::Slice slice_dst_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/1, slice_length, /*color=*/0); + BufferAllocation::Slice slice_dst_fake(&fake_allocations.back(), 0, slice_length); BufferAllocation alloc_src(/*index=*/0, src_length, /*color=*/0); @@ -1591,25 +1568,22 @@ CreateSlicedOperandsSameBufferGemmThunk( int64_t offset_length = sizeof(int64_t); // Preparing buffer allocation slices for thunk creations. - std::vector> fake_allocations; - fake_allocations.push_back( - std::make_unique(/*index=*/0, rhs_length, /*color=*/0)); - BufferAllocation::Slice slice_lhs_fake(fake_allocations.back().get(), 0, + std::vector fake_allocations; + fake_allocations.reserve(4); + fake_allocations.emplace_back(/*index=*/0, rhs_length, /*color=*/0); + BufferAllocation::Slice slice_lhs_fake(&fake_allocations.back(), 0, rhs_length); - fake_allocations.push_back( - std::make_unique(/*index=*/1, rhs_length, /*color=*/0)); - BufferAllocation::Slice slice_rhs_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/1, rhs_length, /*color=*/0); + BufferAllocation::Slice slice_rhs_fake(&fake_allocations.back(), 0, rhs_length); - fake_allocations.push_back( - std::make_unique(/*index=*/2, out_length, /*color=*/0)); - BufferAllocation::Slice slice_out_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/2, out_length, /*color=*/0); + BufferAllocation::Slice slice_out_fake(&fake_allocations.back(), 0, out_length); - fake_allocations.push_back(std::make_unique( - /*index=*/3, 1024 * 1024, /*color=*/0)); - BufferAllocation::Slice slice_workspace_fake(fake_allocations.back().get(), 0, + fake_allocations.emplace_back(/*index=*/3, 1024 * 1024, /*color=*/0); + BufferAllocation::Slice slice_workspace_fake(&fake_allocations.back(), 0, 1024 * 1024); auto alloc = std::make_unique( @@ -1804,11 +1778,11 @@ CreateHostInductionVariableAndOffsetEvaluationThunk( int64_t out_length = sizeof(float) * 1 * 1; // Preparing buffer allocation slices for thunk creations. - std::vector> fake_allocations; - fake_allocations.push_back(std::make_unique( - /*index=*/0, /*size=*/rhs_length, /*color=*/0)); + std::vector fake_allocations; + fake_allocations.reserve(4); + fake_allocations.emplace_back(/*index=*/0, /*size=*/rhs_length, /*color=*/0); BufferAllocation::Slice slice_lhs_fake( - /*allocation=*/fake_allocations.back().get(), /*offset=*/0, + /*allocation=*/&fake_allocations.back(), /*offset=*/0, /*size=*/rhs_length); auto alloc_lhs = std::make_unique( @@ -1816,22 +1790,19 @@ CreateHostInductionVariableAndOffsetEvaluationThunk( BufferAllocation::Slice slice_lhs(alloc_lhs.get(), /*offset=*/0, /*size=*/lhs_length); - fake_allocations.push_back(std::make_unique( - /*index=*/1, /*size=*/rhs_length, /*color=*/0)); + fake_allocations.emplace_back(/*index=*/1, /*size=*/rhs_length, /*color=*/0); BufferAllocation::Slice slice_rhs( - /*allocation=*/fake_allocations.back().get(), /*offset=*/0, + /*allocation=*/&fake_allocations.back(), /*offset=*/0, /*size=*/rhs_length); - fake_allocations.push_back(std::make_unique( - /*index=*/2, /*size=*/out_length, /*color=*/0)); + fake_allocations.emplace_back(/*index=*/2, /*size=*/out_length, /*color=*/0); BufferAllocation::Slice slice_out( - /*allocation=*/fake_allocations.back().get(), /*offset=*/0, + /*allocation=*/&fake_allocations.back(), /*offset=*/0, /*size=*/out_length); - fake_allocations.push_back(std::make_unique( - /*index=*/3, /*size=*/1024 * 1024, /*color=*/0)); + fake_allocations.emplace_back(/*index=*/3, /*size=*/1024 * 1024, /*color=*/0); BufferAllocation::Slice slice_workspace( - /*allocation=*/fake_allocations.back().get(), /*offset=*/0, + /*allocation=*/&fake_allocations.back(), /*offset=*/0, /*size=*/1024 * 1024); backing_allocations.push_back(std::move(alloc_lhs));