mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
In almost all cases this is only included for writing the output formatter, which only uses `std::ostream` so including `<ostream>` is sufficient. The istream header is ~1000 lines so the difference is non-trivial. Pull Request resolved: https://github.com/pytorch/pytorch/pull/106914 Approved by: https://github.com/lezcano
29 lines
795 B
C++
29 lines
795 B
C++
#pragma once
|
|
|
|
#include <torch/csrc/Export.h>
|
|
#include <torch/csrc/jit/codegen/fuser/arg_spec.h>
|
|
#include <torch/csrc/jit/codegen/fuser/partition_desc.h>
|
|
#include <torch/csrc/jit/codegen/fuser/tensor_desc.h>
|
|
#include <torch/csrc/jit/ir/ir.h>
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
namespace torch {
|
|
namespace jit {
|
|
namespace fuser {
|
|
|
|
// Creates a CPU or CUDA kernel for the given graph.
|
|
// Returns the C++ or CUDA string implementing the kernel.
|
|
TORCH_API std::string generateKernel(
|
|
const std::string& name,
|
|
const Graph& graph,
|
|
const std::vector<std::pair<const Value*, const c10::optional<TensorDesc>>>&
|
|
inputs,
|
|
const std::vector<std::pair<const Value*, const TensorDesc>>& outputs,
|
|
const bool use_cuda);
|
|
|
|
} // namespace fuser
|
|
} // namespace jit
|
|
} // namespace torch
|