#pragma once #include namespace torch { namespace jit { namespace fuser { namespace cuda { //! Types of debug print-outs //! //! These can be set through the `PYTORCH_NVFUSER_DUMP` environment variable //! enum class DebugDumpOption { FusionIr, //!< Dump the Fusion IR before lowering FusionIrMath, //!< Dump just the compute (math) part of the Fusion IR KernelIr, //!< Dump the compiler Kernel IR CudaKernel, //!< Dump the generated CUDA C++ kernel code CudaFull, //!< Dump the complete CUDA C++ code CudaToFile, //!< Dump CUDA Strings to File LaunchParam, //!< Dump the Launch parameters of kernel FusionSegments, //!< Dump Segmented Fusion Graph PrintRuntimeArgs, //!< Print the runtime arguments when launching kernels EffectiveBandwidth, //! Measure kernel performance and print effective //! bandwidth FusionSegmentsDrawing, //!< Dump Segmented Fusion Graph PrintPtxasLog, //!< Print the ptxas verbose log including register usage SchedulerDebug, //! Dump scheduler heuristic parameters ParallelDimensions //!< Dump known parallel dimensions }; bool isDebugDumpEnabled(DebugDumpOption option); // Check if fallback path should be used which will dispatch to eagermode if any // errors are encountered. Helpful for debugging. bool useFallback(); // Returns if unrolling should not be used for kernels with RNG in them. bool disableRNGUnrolling(); //! Ceil integer division constexpr int64_t ceilDiv(int64_t a, int64_t b) { return (a + b - 1) / b; } //! Simple mixin for suppressing copy & move operations, ex: //! //! class Foo : public NonCopyable { //! ... //! }; //! class NonCopyable { public: NonCopyable() = default; // No copy/move semantics NonCopyable(const NonCopyable&) = delete; NonCopyable& operator=(const NonCopyable&) = delete; }; //! A generic root for a hierarchy of polymorphic classes: //! - It ensures virtual destructors //! - Provides the base->as() and node->isA() notation class PolymorphicBase { public: virtual ~PolymorphicBase() = default; // Replacement for static_cast(ptr): ptr->as() // (checked in DEBUG builds) template T* as() { #ifdef NDEBUG auto downcast_ptr = static_cast(this); #else auto downcast_ptr = dynamic_cast(this); TORCH_INTERNAL_ASSERT(downcast_ptr != nullptr); #endif return downcast_ptr; } template const T* as() const { #ifdef NDEBUG auto downcast_ptr = static_cast(this); #else auto downcast_ptr = dynamic_cast(this); TORCH_INTERNAL_ASSERT(downcast_ptr != nullptr); #endif return downcast_ptr; } //! Check if the runtime time is T (or derived from T) //! //! \note Don't use this for conditional casts. Instead, use: //! //! if (auto t = dynamic_cast(p)) { ... } //! //! instead of: //! //! if (p->isA()) { auto t = p->as(); ... } //! template bool isA() const { return dynamic_cast(this) != nullptr; } }; } // namespace cuda } // namespace fuser } // namespace jit } // namespace torch