mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: 1. Added CudaFusionGuard as the custom TypeCheck for nvfuser; enabled dynamic shape support with profiling executor; 2. dropped support for legacy fuser; 3. re-enabled nvfuser tests; 4. added registration for profiling record to allow profiling on user specified nodes. Pull Request resolved: https://github.com/pytorch/pytorch/pull/46452 Reviewed By: zou3519, anjali411 Differential Revision: D24364642 Pulled By: ngimel fbshipit-source-id: daf53a9a6b6636e1ede420a3a6d0397d4a8b450b
24 lines
516 B
C++
24 lines
516 B
C++
#pragma once
|
|
|
|
#include <torch/csrc/WindowsTorchApiMacro.h>
|
|
#include <torch/csrc/jit/codegen/cuda/kernel.h>
|
|
|
|
#include <string>
|
|
|
|
namespace torch {
|
|
namespace jit {
|
|
namespace fuser {
|
|
namespace cuda {
|
|
namespace codegen {
|
|
|
|
//! Generates a CUDA kernel definition for the given kernel
|
|
TORCH_CUDA_API std::string generateCudaKernel(
|
|
const Kernel* kernel,
|
|
const std::string& kernel_name = "CUDAGeneratedKernel");
|
|
|
|
} // namespace codegen
|
|
} // namespace cuda
|
|
} // namespace fuser
|
|
} // namespace jit
|
|
} // namespace torch
|