pytorch/torch/csrc/jit/codegen/cuda/utils.h
jiej 127c9402d0 Revert "Revert D30752939: [pytorch][PR] nvfuser update" (#65137)
Summary:
This reverts commit 03389dc851.

Attempt again for PR: https://github.com/pytorch/pytorch/issues/63745
Fixes the windows build failure.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/65137

Reviewed By: seemethere, dzhulgakov, heitorschueroff

Differential Revision: D30994556

Pulled By: malfet

fbshipit-source-id: f1925b6c5cc1a1a441a96499667c91e8dfc1b53d
2021-09-22 04:54:51 -07:00

112 lines
3.1 KiB
C++

#pragma once
#include <c10/util/Exception.h>
namespace torch {
namespace jit {
namespace fuser {
namespace cuda {
//! Types of debug print-outs
//!
//! These can be set through the `PYTORCH_NVFUSER_DUMP` environment variable
//!
enum class DebugDumpOption {
FusionIr, //!< Dump the Fusion IR before lowering
FusionIrMath, //!< Dump just the compute (math) part of the Fusion IR
KernelIr, //!< Dump the compiler Kernel IR
CudaKernel, //!< Dump the generated CUDA C++ kernel code
CudaFull, //!< Dump the complete CUDA C++ code
CudaToFile, //!< Dump CUDA Strings to File
LaunchParam, //!< Dump the Launch parameters of kernel
FusionSegments, //!< Dump Segmented Fusion Graph
PrintRuntimeArgs, //!< Print the runtime arguments when launching kernels
EffectiveBandwidth, //! Measure kernel performance and print effective
//! bandwidth
FusionSegmentsDrawing, //!< Dump Segmented Fusion Graph
PrintPtxasLog, //!< Print the ptxas verbose log including register usage
SchedulerDebug, //! Dump scheduler heuristic parameters
ParallelDimensions //!< Dump known parallel dimensions
};
bool isDebugDumpEnabled(DebugDumpOption option);
// Check if fallback path should be used which will dispatch to eagermode if any
// errors are encountered. Helpful for debugging.
bool useFallback();
// Returns if unrolling should not be used for kernels with RNG in them.
bool disableRNGUnrolling();
//! Ceil integer division
constexpr int64_t ceilDiv(int64_t a, int64_t b) {
return (a + b - 1) / b;
}
//! Simple mixin for suppressing copy & move operations, ex:
//!
//! class Foo : public NonCopyable {
//! ...
//! };
//!
class NonCopyable {
public:
NonCopyable() = default;
// No copy/move semantics
NonCopyable(const NonCopyable&) = delete;
NonCopyable& operator=(const NonCopyable&) = delete;
};
//! A generic root for a hierarchy of polymorphic classes:
//! - It ensures virtual destructors
//! - Provides the base->as<Derived>() and node->isA<T>() notation
class PolymorphicBase {
public:
virtual ~PolymorphicBase() = default;
// Replacement for static_cast<T*>(ptr): ptr->as<T>()
// (checked in DEBUG builds)
template <class T>
T* as() {
#ifdef NDEBUG
auto downcast_ptr = static_cast<T*>(this);
#else
auto downcast_ptr = dynamic_cast<T*>(this);
TORCH_INTERNAL_ASSERT(downcast_ptr != nullptr);
#endif
return downcast_ptr;
}
template <class T>
const T* as() const {
#ifdef NDEBUG
auto downcast_ptr = static_cast<const T*>(this);
#else
auto downcast_ptr = dynamic_cast<const T*>(this);
TORCH_INTERNAL_ASSERT(downcast_ptr != nullptr);
#endif
return downcast_ptr;
}
//! Check if the runtime time is T (or derived from T)
//!
//! \note Don't use this for conditional casts. Instead, use:
//!
//! if (auto t = dynamic_cast<T>(p)) { ... }
//!
//! instead of:
//!
//! if (p->isA<T>()) { auto t = p->as<T>(); ... }
//!
template <class T>
bool isA() const {
return dynamic_cast<const T*>(this) != nullptr;
}
};
} // namespace cuda
} // namespace fuser
} // namespace jit
} // namespace torch