mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-06 12:20:11 +01:00
[XLA:GPU] Fix race condition in gpu_compiler.cc.
We were racing on libdevice_dir_. PiperOrigin-RevId: 174070334
This commit is contained in:
parent
35939d2d37
commit
b5d5326c62
|
|
@ -94,15 +94,13 @@ using tensorflow::strings::StrCat;
|
|||
// http://docs.nvidia.com/cuda/cuda-c-programming-guide/#device-memory-accesses
|
||||
constexpr int64 kMemoryAlignment = 256;
|
||||
|
||||
// Returns the directory containing nvvm libdevice files. This function is
|
||||
// called in GpuCompiler's constructor, so can't return an error. But
|
||||
// GpuCompiler::Compile will return an error when the wanted libdevice file
|
||||
// doesn't exist in the folder this function returns.
|
||||
string GetLibdeviceDir(const HloModuleConfig& config) {
|
||||
// Returns the directory containing nvvm libdevice files. config_cuda_data_dir
|
||||
// should be equal to config().debug_options().xla_gpu_cuda_data_dir() of the
|
||||
// HloModule being compiled.
|
||||
string GetLibdeviceDir(const string& config_cuda_data_dir) {
|
||||
std::vector<string> potential_libdevice_dirs;
|
||||
const string datadir = config.debug_options().xla_gpu_cuda_data_dir();
|
||||
if (!datadir.empty()) {
|
||||
potential_libdevice_dirs.push_back(datadir);
|
||||
if (!config_cuda_data_dir.empty()) {
|
||||
potential_libdevice_dirs.push_back(config_cuda_data_dir);
|
||||
}
|
||||
potential_libdevice_dirs.push_back(tensorflow::LibdeviceRoot());
|
||||
|
||||
|
|
@ -359,12 +357,26 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::Compile(
|
|||
/*optimized=*/false));
|
||||
}
|
||||
|
||||
// Reserve space for the PTX to be generated for this module.
|
||||
string* ptx;
|
||||
string libdevice_dir;
|
||||
{
|
||||
tensorflow::mutex_lock lock(mutex_);
|
||||
|
||||
// Reserve space for the PTX to be generated for this module.
|
||||
generated_ptxes_.emplace_back(MakeUnique<string>());
|
||||
ptx = generated_ptxes_.back().get();
|
||||
|
||||
// Find the directory containing libdevice. To avoid searching for it every
|
||||
// time, we have a one-element cache, keyed on the module's config's
|
||||
// cuda_data_dir.
|
||||
const auto& config_cuda_data_dir =
|
||||
module->config().debug_options().xla_gpu_cuda_data_dir();
|
||||
if (cached_libdevice_dir_.empty() ||
|
||||
cached_cuda_data_dir_ != config_cuda_data_dir) {
|
||||
cached_cuda_data_dir_ = config_cuda_data_dir;
|
||||
cached_libdevice_dir_ = GetLibdeviceDir(config_cuda_data_dir);
|
||||
}
|
||||
libdevice_dir = cached_libdevice_dir_;
|
||||
}
|
||||
int cc_major, cc_minor;
|
||||
if (!stream_exec->GetDeviceDescription().cuda_compute_capability(&cc_major,
|
||||
|
|
@ -374,12 +386,9 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::Compile(
|
|||
cc_major = 2;
|
||||
cc_minor = 0;
|
||||
}
|
||||
if (libdevice_dir_.empty()) {
|
||||
// Compute libdevice_dir_ just once and cache it in this member.
|
||||
libdevice_dir_ = GetLibdeviceDir(module->config());
|
||||
}
|
||||
|
||||
TF_ASSIGN_OR_RETURN(*ptx, CompileToPtx(&llvm_module, {cc_major, cc_minor},
|
||||
module->config(), libdevice_dir_));
|
||||
module->config(), libdevice_dir));
|
||||
|
||||
if (!ir_dump_directory.empty()) {
|
||||
TF_RETURN_IF_ERROR(llvm_ir::DumpIRToDirectory(
|
||||
|
|
|
|||
|
|
@ -71,18 +71,26 @@ class GpuCompiler : public LLVMCompiler {
|
|||
static const char* kDataLayout;
|
||||
|
||||
private:
|
||||
// The parent directory of libdevice IR libraries.
|
||||
string libdevice_dir_;
|
||||
// The size in bytes of a pointer. Used by ShapeSizeBytesFunction.
|
||||
const int64 pointer_size_;
|
||||
|
||||
tensorflow::mutex mutex_;
|
||||
|
||||
// When compiling an HLO module, we need to find a path to the nvvm libdevice
|
||||
// files. We search in the module's config.debug_options().cuda_data_dir()
|
||||
// and in tensorflow::LibdeviceRoot(), the latter of which is a constant.
|
||||
//
|
||||
// We cache the cuda_data_dir() and the result of our search, so that if the
|
||||
// next module we have to compile has the same cuda_data_dir(), we can skip
|
||||
// the search.
|
||||
string cached_cuda_data_dir_ GUARDED_BY(mutex_);
|
||||
string cached_libdevice_dir_ GUARDED_BY(mutex_);
|
||||
|
||||
// The list of PTX strings generated by this GpuCompiler. We let GpuCompiler
|
||||
// to own them because they need to be alive across the life span of the
|
||||
// StreamExecutor (b/24776264).
|
||||
tensorflow::mutex mutex_;
|
||||
std::vector<std::unique_ptr<string>> generated_ptxes_ GUARDED_BY(mutex_);
|
||||
|
||||
// The size in bytes of a pointer. Used by ShapeSizeBytesFunction.
|
||||
int64 pointer_size_;
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(GpuCompiler);
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user