[XLA:GPU] Fix race condition in gpu_compiler.cc.

We were racing on libdevice_dir_.

PiperOrigin-RevId: 174070334
This commit is contained in:
Justin Lebar 2017-10-31 11:59:56 -07:00 committed by TensorFlower Gardener
parent 35939d2d37
commit b5d5326c62
2 changed files with 37 additions and 20 deletions

View File

@ -94,15 +94,13 @@ using tensorflow::strings::StrCat;
// http://docs.nvidia.com/cuda/cuda-c-programming-guide/#device-memory-accesses
constexpr int64 kMemoryAlignment = 256;
// Returns the directory containing nvvm libdevice files. This function is
// called in GpuCompiler's constructor, so can't return an error. But
// GpuCompiler::Compile will return an error when the wanted libdevice file
// doesn't exist in the folder this function returns.
string GetLibdeviceDir(const HloModuleConfig& config) {
// Returns the directory containing nvvm libdevice files. config_cuda_data_dir
// should be equal to config().debug_options().xla_gpu_cuda_data_dir() of the
// HloModule being compiled.
string GetLibdeviceDir(const string& config_cuda_data_dir) {
std::vector<string> potential_libdevice_dirs;
const string datadir = config.debug_options().xla_gpu_cuda_data_dir();
if (!datadir.empty()) {
potential_libdevice_dirs.push_back(datadir);
if (!config_cuda_data_dir.empty()) {
potential_libdevice_dirs.push_back(config_cuda_data_dir);
}
potential_libdevice_dirs.push_back(tensorflow::LibdeviceRoot());
@ -359,12 +357,26 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::Compile(
/*optimized=*/false));
}
// Reserve space for the PTX to be generated for this module.
string* ptx;
string libdevice_dir;
{
tensorflow::mutex_lock lock(mutex_);
// Reserve space for the PTX to be generated for this module.
generated_ptxes_.emplace_back(MakeUnique<string>());
ptx = generated_ptxes_.back().get();
// Find the directory containing libdevice. To avoid searching for it every
// time, we have a one-element cache, keyed on the module's config's
// cuda_data_dir.
const auto& config_cuda_data_dir =
module->config().debug_options().xla_gpu_cuda_data_dir();
if (cached_libdevice_dir_.empty() ||
cached_cuda_data_dir_ != config_cuda_data_dir) {
cached_cuda_data_dir_ = config_cuda_data_dir;
cached_libdevice_dir_ = GetLibdeviceDir(config_cuda_data_dir);
}
libdevice_dir = cached_libdevice_dir_;
}
int cc_major, cc_minor;
if (!stream_exec->GetDeviceDescription().cuda_compute_capability(&cc_major,
@ -374,12 +386,9 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::Compile(
cc_major = 2;
cc_minor = 0;
}
if (libdevice_dir_.empty()) {
// Compute libdevice_dir_ just once and cache it in this member.
libdevice_dir_ = GetLibdeviceDir(module->config());
}
TF_ASSIGN_OR_RETURN(*ptx, CompileToPtx(&llvm_module, {cc_major, cc_minor},
module->config(), libdevice_dir_));
module->config(), libdevice_dir));
if (!ir_dump_directory.empty()) {
TF_RETURN_IF_ERROR(llvm_ir::DumpIRToDirectory(

View File

@ -71,18 +71,26 @@ class GpuCompiler : public LLVMCompiler {
static const char* kDataLayout;
private:
// The parent directory of libdevice IR libraries.
string libdevice_dir_;
// The size in bytes of a pointer. Used by ShapeSizeBytesFunction.
const int64 pointer_size_;
tensorflow::mutex mutex_;
// When compiling an HLO module, we need to find a path to the nvvm libdevice
// files. We search in the module's config.debug_options().cuda_data_dir()
// and in tensorflow::LibdeviceRoot(), the latter of which is a constant.
//
// We cache the cuda_data_dir() and the result of our search, so that if the
// next module we have to compile has the same cuda_data_dir(), we can skip
// the search.
string cached_cuda_data_dir_ GUARDED_BY(mutex_);
string cached_libdevice_dir_ GUARDED_BY(mutex_);
// The list of PTX strings generated by this GpuCompiler. We let GpuCompiler
// to own them because they need to be alive across the life span of the
// StreamExecutor (b/24776264).
tensorflow::mutex mutex_;
std::vector<std::unique_ptr<string>> generated_ptxes_ GUARDED_BY(mutex_);
// The size in bytes of a pointer. Used by ShapeSizeBytesFunction.
int64 pointer_size_;
TF_DISALLOW_COPY_AND_ASSIGN(GpuCompiler);
};