Revert "[Reland] fix missing-prototypes warnings in torch_cpu (Part 4) (#101949)"

This reverts commit 4f2c007a1b.

Reverted https://github.com/pytorch/pytorch/pull/101949 on behalf of https://github.com/osalpekar due to As noted in @izaitsevfb's comment, we are still seeing linker errors, this time due to `nnc_prepacked_linear_clamp_run` being made a static function. ([comment](https://github.com/pytorch/pytorch/pull/101949#issuecomment-1560226880))
This commit is contained in:
PyTorch MergeBot 2023-05-23 22:53:47 +00:00
parent 45a8f691ec
commit 32ce06a5ab
138 changed files with 772 additions and 572 deletions

View File

@ -1598,7 +1598,6 @@ TORCH_COPTS = COMMON_COPTS + [
"-fvisibility-inlines-hidden",
"-fno-math-errno ",
"-fno-trapping-math",
"-Wno-error=unused-function",
]
torch_sources = {

View File

@ -763,7 +763,7 @@ IValueComparator getGreaterThanComparator(const IValue& v) {
};
}
std::ostream& operator<<(std::ostream& out, const ivalue::EnumHolder& v) {
static std::ostream& operator<<(std::ostream& out, const ivalue::EnumHolder& v) {
out << v.qualifiedClassName() << "." << v.name();
return out;
}

View File

@ -1628,7 +1628,7 @@ struct ivalue::EnumHolder : c10::intrusive_ptr_target {
TORCH_API friend std::ostream& operator<<(
std::ostream& out,
const ivalue::EnumHolder& v);
const EnumHolder& v);
TORCH_API const std::string qualifiedClassName() const;

View File

@ -405,7 +405,7 @@ static std::tuple<Tensor,optional<int64_t>> searchsorted_batch_rule(
TORCH_INTERNAL_ASSERT(false);
}
static Tensor bucketize_decomp_Tensor(
Tensor bucketize_decomp_Tensor(
const Tensor& self,
const Tensor& boundaries,
bool out_int32,
@ -415,7 +415,7 @@ static Tensor bucketize_decomp_Tensor(
return at::searchsorted(boundaries, self, out_int32, right, nullopt, nullopt);
}
static Tensor bucketize_decomp_Scalar(
Tensor bucketize_decomp_Scalar(
const Scalar& self,
const Tensor& boundaries,
bool out_int32,

View File

@ -374,8 +374,8 @@ TORCH_IMPL_FUNC(softshrink_backward_out) (
shrink_backward_stub(device_type(), *this, lambd);
}
#if AT_MKLDNN_ENABLED()
static bool use_mkldnn(const Tensor& input) {
#if AT_MKLDNN_ENABLED()
if (!at::globalContext().userEnabledMkldnn()) {
return false;
}
@ -386,8 +386,9 @@ static bool use_mkldnn(const Tensor& input) {
(input.device().is_cpu() &&
(((input.scalar_type() == kBFloat16) && mkldnn_bf16_device_check()) ||
(input.scalar_type() == kFloat))); // input is dense layout and bfloat16/float32
}
#endif
return false;
}
TORCH_IMPL_FUNC(gelu_out_cpu) (
const Tensor& self, c10::string_view approximate, const Tensor& result

View File

@ -809,7 +809,7 @@ Tensor& arctan2_out(const Tensor& self, const Tensor& other, Tensor& result) {
return at::atan2_out(result, self, other);
}
static Tensor& add_relu_impl(
Tensor& add_relu_impl(
Tensor& result, const Tensor& self, const Tensor& other, const Scalar& alpha) {
auto iter = TensorIterator::binary_op(result, self, other);
Scalar min_val;
@ -1003,7 +1003,7 @@ Tensor& mul__scalar_sparse_csr(Tensor& self, const Scalar& other) {
return self;
}
static Device correct_out_device(const Tensor& self, const Tensor& other) {
Device correct_out_device(const Tensor& self, const Tensor& other) {
if (self.device() == at::kCPU){
return other.device();
} else {
@ -1049,7 +1049,7 @@ Tensor div_zerotensor(const Tensor& self, const Tensor& other) {
}
}
static Tensor maybe_add_maybe_sub(const Tensor& self, const Tensor& other, const Scalar& alpha) {
Tensor maybe_add_maybe_sub(const Tensor& self, const Tensor& other, const Scalar& alpha) {
auto out_device = correct_out_device(self, other);
// hack to use the TensorIterator to get the correct broadcasting and type promotion logic
auto device_ = Device(DeviceType::Meta);

View File

@ -770,7 +770,6 @@ static void check_input_same_type_as_parameters(
check_input_same_type_as_parameters(input, weight, /*bias=*/ Tensor());
}
#if AT_MKLDNN_ENABLED()
static void check_input_same_type_as_parameters(
const Tensor& input,
const Tensor& weight,
@ -789,7 +788,6 @@ static void check_input_same_type_as_parameters(
check_input_same_type_as_parameters(input, weight, bias);
}
}
#endif
static auto view4d(const at::Tensor& tensor) -> at::Tensor {
TORCH_CHECK(tensor.ndimension() == 3,

View File

@ -21,7 +21,6 @@
#include <ATen/NativeFunctions.h>
#else
#include <ATen/ops/_copy_from.h>
#include <ATen/ops/_propagate_xla_data.h>
#include <ATen/ops/copy_native.h>
#include <ATen/ops/empty.h>
#include <ATen/ops/expand_copy.h>

View File

@ -3,11 +3,6 @@
#include <ATen/WrapDimUtils.h>
#include <ATen/LegacyVmapTransforms.h>
#ifdef AT_PER_OPERATOR_HEADERS
#include <ATen/ops/_add_batch_dim_native.h>
#include <ATen/ops/_remove_batch_dim_native.h>
#endif
namespace at { namespace native {
// Adds a batch dimension to the tensor `self` out-of-place

View File

@ -1893,7 +1893,7 @@ The behavior depends on the dimensionality of the Tensors as follows:
- Otherwise, we return bmm, after broadcasting and folding the batched dimensions if
there's more than one
*/
static Tensor _matmul_impl(
Tensor _matmul_impl(
Tensor& out,
const Tensor& tensor1,
const Tensor& tensor2) {

View File

@ -20,7 +20,7 @@
namespace at { namespace native {
static void checkLongTensor(const Tensor& tensor) {
void checkLongTensor(const Tensor& tensor) {
TORCH_CHECK(tensor.dim() == 1 && tensor.device().type() == at::kCPU && tensor.scalar_type() == at::kLong,
"'lengths' argument should be a 1D CPU int64 tensor, but got ",
tensor.dim(), "D ", tensor.device().str(), " ", tensor.scalar_type(), " tensor");

View File

@ -1809,7 +1809,7 @@ std::tuple<Tensor, Tensor, Tensor> quantized_lstm_data(
std::move(std::get<2>(results)));
}
static std::tuple<Tensor, Tensor, Tensor> quantized_lstm_data_legacy(
std::tuple<Tensor, Tensor, Tensor> quantized_lstm_data_legacy(
const Tensor& data,
const Tensor& batch_sizes,
c10::List<at::Tensor> hx_,

View File

@ -11,7 +11,6 @@
#include <ATen/ops/resize_as_native.h>
#include <ATen/ops/resize_native.h>
#include <ATen/ops/resize.h>
#include <ATen/ops/_resize_output.h>
#endif
namespace at { namespace native {

View File

@ -400,7 +400,7 @@ static void build_index_op(
iter.build(config);
}
static void check_indices_on_cpu_or_selfdevice(
void check_indices_on_cpu_or_selfdevice(
const Tensor& self,
const at::MaterializedIOptTensorListRef& indices) {
auto dev = self.device();
@ -965,7 +965,7 @@ TORCH_IMPL_FUNC(index_add_cpu_out)
}
}
static void index_reduce_func_impl(
void index_reduce_func_impl(
const Tensor& self,
int64_t dim,
const Tensor& index,
@ -1149,7 +1149,7 @@ static void check_indexarray_range(
}
}
static Tensor & index_select_out_cpu_dim1_(
Tensor & index_select_out_cpu_dim1_(
Tensor & result_contig, const Tensor & self, const Tensor & index_contig) {
auto self_contig = self.contiguous();
@ -1379,6 +1379,10 @@ Tensor index_select_quantized_cpu_(const Tensor & self, int64_t dim, const Tenso
return at::native::index_select_out_cpu_(self, dim, index, result);
}
Tensor index_select_backward(const Tensor& grad, at::IntArrayRef self_sizes, int64_t dim, const Tensor& index) {
return at::native::index_select_backward_symint(grad, c10::fromIntArrayRefSlow(self_sizes), dim, index);
}
Tensor index_select_backward_symint(const Tensor& grad, c10::SymIntArrayRef self_sizes, int64_t dim, const Tensor& index) {
// for composite compliance, use out-of-place variant of
// `index_add` if index tensor is a Tensor Subclass.
@ -1533,7 +1537,7 @@ static void scatter_reduce_exclude_self_helper(
});
}
static void _scatter_via_index_put(
void _scatter_via_index_put(
const Tensor& self,
int64_t dim,
const Tensor& index,

View File

@ -1009,7 +1009,7 @@ Tensor dense_to_sparse_bsc(const Tensor& self, IntArrayRef blocksize, c10::optio
return dense_to_sparse_compressed<Layout::SparseBsc>(self, blocksize, dense_dim_opt);
}
static void _check_blocksize_matches(
void _check_blocksize_matches(
const Tensor& self,
c10::optional<IntArrayRef> blocksize_opt,
const std::string& name) {
@ -1023,7 +1023,7 @@ static void _check_blocksize_matches(
}
}
static Tensor sparse_compressed_clone(
Tensor sparse_compressed_clone(
const Tensor& self,
c10::optional<IntArrayRef> blocksize,
const std::string& name) {
@ -1046,7 +1046,7 @@ static Tensor sparse_compressed_clone(
values.device());
}
static Tensor sparse_compressed_to_flipped(
Tensor sparse_compressed_to_flipped(
const Tensor& self,
c10::optional<IntArrayRef> blocksize,
const std::string& name) {

View File

@ -1,6 +1,5 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/core/Tensor.h>
#include <ATen/native/Unfold3d.h>
#include <ATen/Config.h>
#include <ATen/Dispatch.h>
#include <ATen/Parallel.h>

View File

@ -10,8 +10,6 @@
#else
#include <ATen/ops/_weight_norm_differentiable_backward_native.h>
#include <ATen/ops/_weight_norm_interface.h>
#include <ATen/ops/_weight_norm_interface_backward_native.h>
#include <ATen/ops/_weight_norm_interface_native.h>
#include <ATen/ops/_weight_norm_native.h>
#include <ATen/ops/empty_strided.h>
#include <ATen/ops/norm_except_dim.h>

View File

@ -13,7 +13,7 @@ namespace at::native {
inline namespace CPU_CAPABILITY {
static void pow_tensor_tensor_kernel(TensorIteratorBase& iter) {
void pow_tensor_tensor_kernel(TensorIteratorBase& iter) {
const auto dtype = iter.common_dtype();
if (isFloatingType(dtype) || isComplexType(dtype)) {
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kHalf, kBFloat16, dtype, "pow", [&]() {
@ -90,7 +90,7 @@ void reciprocal_kernel(TensorIteratorBase& iter);
void rsqrt_kernel(TensorIteratorBase& iter);
void sqrt_kernel(TensorIteratorBase& iter);
static void pow_tensor_scalar_kernel(
void pow_tensor_scalar_kernel(
TensorIteratorBase& iter,
const Scalar& exp_scalar) {
// prevent multiple calls to iter.common_dtype()

View File

@ -32,7 +32,6 @@ namespace mkl {
namespace {
#if AT_USE_MKL_SPARSE()
c10::MaybeOwned<Tensor> prepare_dense_matrix_for_mkl(
const Tensor& tensor) {
if (tensor.is_non_overlapping_and_dense() ||
@ -111,6 +110,7 @@ void inline col_indices_and_values_resize_(const Tensor& input, int64_t nnz) {
/*
Resizes `input` tensor and fills it with the data from MKL.
*/
#if AT_USE_MKL_SPARSE()
template <typename scalar_t>
void mkl_result_copy_(const Tensor& input, sparse_matrix_t mkl_desc) {
sparse_index_base_t indexing = SPARSE_INDEX_BASE_ZERO;

View File

@ -6,7 +6,7 @@
namespace at {
namespace native {
static TensorOptions verify_empty_parameters(
TensorOptions verify_empty_parameters(
const at::Tensor& self,
c10::optional<ScalarType> dtype,
c10::optional<Layout> layout,

View File

@ -79,6 +79,64 @@ Tensor bmm_nested(const Tensor& self, const Tensor& mat2) {
return output;
}
// utilities support `matmul_nested`
namespace {
// Args:
// self_sizes: the sizes of `self` in `matmul_nested`
// mat2_sizes: the sizes of `mat2` in `matmul_nested`
// buffer_op: the options for new buffer
// sizemat_op: the options for new size matrix
// Returns:
// the batch size of each input underlying tensor, i.e. the product of batch-dimension sizes
// the empty output nested tensor
inline std::tuple<std::vector<int64_t>, Tensor>
matmul_nested_helper(
const std::vector<IntArrayRef>& self_sizes,
const std::vector<IntArrayRef>& mat2_sizes,
const c10::TensorOptions& buffer_op,
const c10::TensorOptions& sizemat_op) {
int64_t ntensors = self_sizes.size(),
ndims = self_sizes[0].size();
std::vector<int64_t> batch_sizes(ntensors, 1);
Tensor sizemat = at::empty({ntensors, ndims}, sizemat_op);
int64_t* sizemat_ptr = sizemat.mutable_data_ptr<int64_t>();
int64_t numel = 0;
for (int64_t i = 0; i < ntensors; i++) {
const IntArrayRef& self_size = self_sizes[i],
& mat2_size = mat2_sizes[i];
int64_t& batch_size = batch_sizes[i];
// batch dimensions
for (int64_t j = 0; j < ndims - 2; j++) {
const int64_t& self_sizej = self_size[j],
& mat2_sizej = mat2_size[j];
TORCH_CHECK(
self_sizej == mat2_sizej,
"matmul: For nested tensors, no broadcasting is currently performed: ",
i, "-th nested matrices in batch at dimension ", j + 1,
" have mismatching sizes ", self_sizej, " and ", mat2_sizej);
sizemat_ptr[j] = self_sizej;
batch_size *= sizemat_ptr[j];
}
// matrix multiplication dimensions
const int64_t& self_size0 = self_size[ndims - 2], & self_size1 = self_size[ndims - 1],
& mat2_size0 = mat2_size[ndims - 2], & mat2_size1 = mat2_size[ndims - 1];
TORCH_CHECK(
self_size1 == mat2_size0,
"matmul: ",
i, "-th nested matrices in batch cannot be multiplied (",
self_size0, "x", self_size1, " and ",
mat2_size0, "x", mat2_size1, ")");
sizemat_ptr[ndims - 2] = self_size0;
sizemat_ptr[ndims - 1] = mat2_size1;
sizemat_ptr += ndims;
numel += batch_size * self_size0 * mat2_size1;
}
Tensor buffer = at::empty(numel, buffer_op);
Tensor output = wrap_buffer(buffer, sizemat);
return std::make_tuple(batch_sizes, output);
}
}
Tensor matmul_with_bmm_nested(const Tensor& self, const Tensor& mat2) {
// Tensor self = self_.contiguous();
// Tensor mat2 = mat2_.contiguous();

View File

@ -128,7 +128,7 @@ Tensor fake_quantize_per_channel_affine_cachemask_backward(
return dY * mask;
}
static Tensor _get_rounded_zero_point(
Tensor _get_rounded_zero_point(
const Tensor& zero_point,
int64_t quant_min,
int64_t quant_max) {

View File

@ -133,7 +133,7 @@ Tensor fake_quantize_per_tensor_affine_cachemask_backward(
return dY * mask;
}
static int64_t _get_zero_point_from_tensor(
int64_t _get_zero_point_from_tensor(
const Tensor& zero_point,
int64_t quant_min,
int64_t quant_max,

View File

@ -285,7 +285,7 @@ std::tuple<double, int64_t> _choose_qparams_per_tensor(
return std::make_tuple(q_params.scale, q_params.zero_point);
}
static float calculate_quant_loss(
float calculate_quant_loss(
const float* input,
int numel,
float xmin,

View File

@ -171,6 +171,15 @@ Tensor mean_quantized_cpu(
return result;
}
Tensor mean_quantized_cpu(
const Tensor& self,
DimnameList dim,
bool keepdim,
optional<ScalarType> dtype) {
return mean_quantized_cpu(
self, dimnames_to_positions(self, dim), keepdim, dtype);
}
Tensor& mean_out_quantized_cpu(
Tensor& result,
const Tensor& self,

View File

@ -9,7 +9,7 @@ namespace native {
DEFINE_DISPATCH(qdropout_stub);
static Tensor quantized_dropout(
Tensor quantized_dropout(
const Tensor& qx, double output_scale, int64_t output_zero_point, const Scalar& p, bool training) {
return qx;
}

View File

@ -35,7 +35,7 @@ DEFINE_DISPATCH(qrelu_leaky_stub);
DEFINE_DISPATCH(qprelu_stub);
#ifdef USE_PYTORCH_QNNPACK
static Tensor qnnpack_relu(Tensor input) {
Tensor qnnpack_relu(Tensor input) {
Tensor qy;
TORCH_CHECK(
input.ndimension() > 0, "qnnpack_relu(): Got empty input tensor");

View File

@ -122,7 +122,7 @@ bool solve_arange(const Tensor& input, int64_t& start, int64_t& end, int64_t& st
formats with support to batched and dense dimensions.
*/
static void _validate_sparse_compressed_tensor_args_worker(const Tensor& compressed_indices, const Tensor& plain_indices, const Tensor& values, const IntArrayRef size, const Layout& layout) {
void _validate_sparse_compressed_tensor_args_worker(const Tensor& compressed_indices, const Tensor& plain_indices, const Tensor& values, const IntArrayRef size, const Layout& layout) {
// Layout must be Sparse Compressed, 2.4
AT_DISPATCH_ALL_SPARSE_COMPRESSED_LAYOUTS(layout, "validate_sparse_compressed_tensor_args", [&]{});
@ -321,7 +321,7 @@ void _validate_sparse_bsc_tensor_args(const Tensor& ccol_indices, const Tensor&
// of historical reasons (that ought to be removed in future) and does
// not mean that the corresponding functionality would be CSR layout
// only specific.
static SparseCsrTensor new_compressed_tensor(const TensorOptions& options) {
SparseCsrTensor new_compressed_tensor(const TensorOptions& options) {
// TODO: remove this comment after enabling autograd support for CSR tensor
// constructor.
// TORCH_INTERNAL_ASSERT(impl::variable_excluded_from_dispatch());
@ -401,7 +401,7 @@ SPARSE_COMPRESSED_TENSOR_UNSAFE(csc, kSparseCsc);
SPARSE_COMPRESSED_TENSOR_UNSAFE(bsr, kSparseBsr);
SPARSE_COMPRESSED_TENSOR_UNSAFE(bsc, kSparseBsc);
static DimVector _estimate_sparse_compressed_tensor_size(
DimVector _estimate_sparse_compressed_tensor_size(
const Tensor& compressed_indices,
const Tensor& plain_indices,
const Tensor& values,
@ -716,6 +716,12 @@ int64_t dense_dim_sparse_csr(const SparseCsrTensor& self) {
return get_sparse_csr_impl(self)->dense_dim();
}
bool _is_same_size_as_sparse_compressed(
const SparseCsrTensor& self,
const SparseCsrTensor& src) {
return self.sizes().equals(src.sizes());
}
const SparseCsrTensor& resize_as_sparse_compressed_(
const SparseCsrTensor& self,
const SparseCsrTensor& src) {

View File

@ -342,6 +342,16 @@ inline Tensor get_result_tensor_for_unary_op(F op, const Tensor& input) {
}
} // namespace
static constexpr bool is_mkl_supported() {
#ifdef _MSC_VER
return false;
#elif __APPLE__ || __MACH__
return false;
#else
return true;
#endif
}
// Only accept squares sparse matrices or dense input as a vector
// TODO: Check what happens with MKL, the output error reported with non square
// matrices tends to be high See:

View File

@ -78,6 +78,20 @@
namespace at::native {
using namespace at::sparse;
// --------------------------------------------------------------------
// Utility functions
// --------------------------------------------------------------------
namespace {
inline SparseTensor get_result_tensor_for_unary_op(const SparseTensor& input) {
if (c10::isIntegralType(input.scalar_type(), /*includeBool=*/true)) {
return at::empty_like(input, input.options().dtype(c10::get_default_dtype()));
}
return at::empty_like(input);
}
}
// --------------------------------------------------------------------
// zero_(SparseTensor)
// --------------------------------------------------------------------

View File

@ -16,7 +16,7 @@ nnapi_wrapper* nnapi;
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
nnapi_wrapper* check_nnapi;
static void load_platform_library() {
void load_platform_library() {
static int run_once = [](){
nnapi_wrapper_load(&nnapi, &check_nnapi);
CAFFE_ENFORCE(nnapi);

View File

@ -23,7 +23,7 @@
static int loaded = 0;
static struct nnapi_wrapper nnapi_;
static struct nnapi_wrapper check_nnapi_;
static int check__getDeviceCount(uint32_t* numDevices) {
int check__getDeviceCount(uint32_t* numDevices) {
CAFFE_ENFORCE(nnapi_._getDeviceCount);
int ret = nnapi_._getDeviceCount(numDevices);
// TODO: Maybe add better logging here.
@ -33,7 +33,7 @@ static int check__getDeviceCount(uint32_t* numDevices) {
);
return ret;
}
static int check__getDevice(uint32_t devIndex, ANeuralNetworksDevice** device) {
int check__getDevice(uint32_t devIndex, ANeuralNetworksDevice** device) {
CAFFE_ENFORCE(nnapi_._getDevice);
int ret = nnapi_._getDevice(devIndex,device);
// TODO: Maybe add better logging here.
@ -43,7 +43,7 @@ static int check__getDevice(uint32_t devIndex, ANeuralNetworksDevice** device) {
);
return ret;
}
static int check_Device_getName(const ANeuralNetworksDevice* device, const char** name) {
int check_Device_getName(const ANeuralNetworksDevice* device, const char** name) {
CAFFE_ENFORCE(nnapi_.Device_getName);
int ret = nnapi_.Device_getName(device,name);
// TODO: Maybe add better logging here.
@ -53,7 +53,7 @@ static int check_Device_getName(const ANeuralNetworksDevice* device, const char*
);
return ret;
}
static int check_Device_getVersion(const ANeuralNetworksDevice* device, const char** version) {
int check_Device_getVersion(const ANeuralNetworksDevice* device, const char** version) {
CAFFE_ENFORCE(nnapi_.Device_getVersion);
int ret = nnapi_.Device_getVersion(device,version);
// TODO: Maybe add better logging here.
@ -63,7 +63,7 @@ static int check_Device_getVersion(const ANeuralNetworksDevice* device, const ch
);
return ret;
}
static int check_Device_getFeatureLevel(const ANeuralNetworksDevice* device, int64_t* featureLevel) {
int check_Device_getFeatureLevel(const ANeuralNetworksDevice* device, int64_t* featureLevel) {
CAFFE_ENFORCE(nnapi_.Device_getFeatureLevel);
int ret = nnapi_.Device_getFeatureLevel(device,featureLevel);
// TODO: Maybe add better logging here.
@ -73,7 +73,7 @@ static int check_Device_getFeatureLevel(const ANeuralNetworksDevice* device, int
);
return ret;
}
static int check_Model_getSupportedOperationsForDevices( const ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices, uint32_t numDevices, bool* supportedOps) {
int check_Model_getSupportedOperationsForDevices( const ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices, uint32_t numDevices, bool* supportedOps) {
CAFFE_ENFORCE(nnapi_.Model_getSupportedOperationsForDevices);
int ret = nnapi_.Model_getSupportedOperationsForDevices(model,devices,numDevices,supportedOps);
// TODO: Maybe add better logging here.
@ -83,7 +83,7 @@ static int check_Model_getSupportedOperationsForDevices( const ANeuralNetworksMo
);
return ret;
}
static int check_Compilation_createForDevices(ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices, uint32_t numDevices, ANeuralNetworksCompilation** compilation) {
int check_Compilation_createForDevices(ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices, uint32_t numDevices, ANeuralNetworksCompilation** compilation) {
CAFFE_ENFORCE(nnapi_.Compilation_createForDevices);
int ret = nnapi_.Compilation_createForDevices(model,devices,numDevices,compilation);
// TODO: Maybe add better logging here.
@ -93,7 +93,7 @@ static int check_Compilation_createForDevices(ANeuralNetworksModel* model, const
);
return ret;
}
static int check_Execution_compute(ANeuralNetworksExecution* execution) {
int check_Execution_compute(ANeuralNetworksExecution* execution) {
CAFFE_ENFORCE(nnapi_.Execution_compute);
int ret = nnapi_.Execution_compute(execution);
// TODO: Maybe add better logging here.
@ -103,7 +103,7 @@ static int check_Execution_compute(ANeuralNetworksExecution* execution) {
);
return ret;
}
static int check_Memory_createFromFd(size_t size, int protect, int fd, size_t offset, ANeuralNetworksMemory** memory) {
int check_Memory_createFromFd(size_t size, int protect, int fd, size_t offset, ANeuralNetworksMemory** memory) {
CAFFE_ENFORCE(nnapi_.Memory_createFromFd);
int ret = nnapi_.Memory_createFromFd(size,protect,fd,offset,memory);
// TODO: Maybe add better logging here.
@ -113,11 +113,11 @@ static int check_Memory_createFromFd(size_t size, int protect, int fd, size_t of
);
return ret;
}
static void check_Memory_free(ANeuralNetworksMemory* memory) {
void check_Memory_free(ANeuralNetworksMemory* memory) {
CAFFE_ENFORCE(nnapi_.Memory_free);
nnapi_.Memory_free(memory);
}
static int check_Model_create(ANeuralNetworksModel** model) {
int check_Model_create(ANeuralNetworksModel** model) {
CAFFE_ENFORCE(nnapi_.Model_create);
int ret = nnapi_.Model_create(model);
// TODO: Maybe add better logging here.
@ -127,11 +127,11 @@ static int check_Model_create(ANeuralNetworksModel** model) {
);
return ret;
}
static void check_Model_free(ANeuralNetworksModel* model) {
void check_Model_free(ANeuralNetworksModel* model) {
CAFFE_ENFORCE(nnapi_.Model_free);
nnapi_.Model_free(model);
}
static int check_Model_finish(ANeuralNetworksModel* model) {
int check_Model_finish(ANeuralNetworksModel* model) {
CAFFE_ENFORCE(nnapi_.Model_finish);
int ret = nnapi_.Model_finish(model);
// TODO: Maybe add better logging here.
@ -141,7 +141,7 @@ static int check_Model_finish(ANeuralNetworksModel* model) {
);
return ret;
}
static int check_Model_addOperand(ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type) {
int check_Model_addOperand(ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type) {
CAFFE_ENFORCE(nnapi_.Model_addOperand);
int ret = nnapi_.Model_addOperand(model,type);
// TODO: Maybe add better logging here.
@ -151,7 +151,7 @@ static int check_Model_addOperand(ANeuralNetworksModel* model, const ANeuralNetw
);
return ret;
}
static int check_Model_setOperandValue(ANeuralNetworksModel* model, int32_t index, const void* buffer, size_t length) {
int check_Model_setOperandValue(ANeuralNetworksModel* model, int32_t index, const void* buffer, size_t length) {
CAFFE_ENFORCE(nnapi_.Model_setOperandValue);
int ret = nnapi_.Model_setOperandValue(model,index,buffer,length);
// TODO: Maybe add better logging here.
@ -161,7 +161,7 @@ static int check_Model_setOperandValue(ANeuralNetworksModel* model, int32_t inde
);
return ret;
}
static int check_Model_setOperandValueFromMemory(ANeuralNetworksModel* model, int32_t index, const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
int check_Model_setOperandValueFromMemory(ANeuralNetworksModel* model, int32_t index, const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
CAFFE_ENFORCE(nnapi_.Model_setOperandValueFromMemory);
int ret = nnapi_.Model_setOperandValueFromMemory(model,index,memory,offset,length);
// TODO: Maybe add better logging here.
@ -171,7 +171,7 @@ static int check_Model_setOperandValueFromMemory(ANeuralNetworksModel* model, in
);
return ret;
}
static int check_Model_addOperation(ANeuralNetworksModel* model, ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs) {
int check_Model_addOperation(ANeuralNetworksModel* model, ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs) {
CAFFE_ENFORCE(nnapi_.Model_addOperation);
int ret = nnapi_.Model_addOperation(model,type,inputCount,inputs,outputCount,outputs);
// TODO: Maybe add better logging here.
@ -181,7 +181,7 @@ static int check_Model_addOperation(ANeuralNetworksModel* model, ANeuralNetworks
);
return ret;
}
static int check_Model_identifyInputsAndOutputs(ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs) {
int check_Model_identifyInputsAndOutputs(ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs) {
CAFFE_ENFORCE(nnapi_.Model_identifyInputsAndOutputs);
int ret = nnapi_.Model_identifyInputsAndOutputs(model,inputCount,inputs,outputCount,outputs);
// TODO: Maybe add better logging here.
@ -191,7 +191,7 @@ static int check_Model_identifyInputsAndOutputs(ANeuralNetworksModel* model, uin
);
return ret;
}
static int check_Model_relaxComputationFloat32toFloat16(ANeuralNetworksModel* model, bool allow) {
int check_Model_relaxComputationFloat32toFloat16(ANeuralNetworksModel* model, bool allow) {
CAFFE_ENFORCE(nnapi_.Model_relaxComputationFloat32toFloat16);
int ret = nnapi_.Model_relaxComputationFloat32toFloat16(model,allow);
// TODO: Maybe add better logging here.
@ -201,7 +201,7 @@ static int check_Model_relaxComputationFloat32toFloat16(ANeuralNetworksModel* mo
);
return ret;
}
static int check_Compilation_create(ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation) {
int check_Compilation_create(ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation) {
CAFFE_ENFORCE(nnapi_.Compilation_create);
int ret = nnapi_.Compilation_create(model,compilation);
// TODO: Maybe add better logging here.
@ -211,11 +211,11 @@ static int check_Compilation_create(ANeuralNetworksModel* model, ANeuralNetworks
);
return ret;
}
static void check_Compilation_free(ANeuralNetworksCompilation* compilation) {
void check_Compilation_free(ANeuralNetworksCompilation* compilation) {
CAFFE_ENFORCE(nnapi_.Compilation_free);
nnapi_.Compilation_free(compilation);
}
static int check_Compilation_setPreference(ANeuralNetworksCompilation* compilation, int32_t preference) {
int check_Compilation_setPreference(ANeuralNetworksCompilation* compilation, int32_t preference) {
CAFFE_ENFORCE(nnapi_.Compilation_setPreference);
int ret = nnapi_.Compilation_setPreference(compilation,preference);
// TODO: Maybe add better logging here.
@ -225,7 +225,7 @@ static int check_Compilation_setPreference(ANeuralNetworksCompilation* compilati
);
return ret;
}
static int check_Compilation_finish(ANeuralNetworksCompilation* compilation) {
int check_Compilation_finish(ANeuralNetworksCompilation* compilation) {
CAFFE_ENFORCE(nnapi_.Compilation_finish);
int ret = nnapi_.Compilation_finish(compilation);
// TODO: Maybe add better logging here.
@ -235,7 +235,7 @@ static int check_Compilation_finish(ANeuralNetworksCompilation* compilation) {
);
return ret;
}
static int check_Execution_create(ANeuralNetworksCompilation* compilation, ANeuralNetworksExecution** execution) {
int check_Execution_create(ANeuralNetworksCompilation* compilation, ANeuralNetworksExecution** execution) {
CAFFE_ENFORCE(nnapi_.Execution_create);
int ret = nnapi_.Execution_create(compilation,execution);
// TODO: Maybe add better logging here.
@ -245,11 +245,11 @@ static int check_Execution_create(ANeuralNetworksCompilation* compilation, ANeur
);
return ret;
}
static void check_Execution_free(ANeuralNetworksExecution* execution) {
void check_Execution_free(ANeuralNetworksExecution* execution) {
CAFFE_ENFORCE(nnapi_.Execution_free);
nnapi_.Execution_free(execution);
}
static int check_Execution_setInput(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const void* buffer, size_t length) {
int check_Execution_setInput(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const void* buffer, size_t length) {
CAFFE_ENFORCE(nnapi_.Execution_setInput);
int ret = nnapi_.Execution_setInput(execution,index,type,buffer,length);
// TODO: Maybe add better logging here.
@ -259,7 +259,7 @@ static int check_Execution_setInput(ANeuralNetworksExecution* execution, int32_t
);
return ret;
}
static int check_Execution_setInputFromMemory(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
int check_Execution_setInputFromMemory(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
CAFFE_ENFORCE(nnapi_.Execution_setInputFromMemory);
int ret = nnapi_.Execution_setInputFromMemory(execution,index,type,memory,offset,length);
// TODO: Maybe add better logging here.
@ -269,7 +269,7 @@ static int check_Execution_setInputFromMemory(ANeuralNetworksExecution* executio
);
return ret;
}
static int check_Execution_setOutput(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, void* buffer, size_t length) {
int check_Execution_setOutput(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, void* buffer, size_t length) {
CAFFE_ENFORCE(nnapi_.Execution_setOutput);
int ret = nnapi_.Execution_setOutput(execution,index,type,buffer,length);
// TODO: Maybe add better logging here.
@ -279,7 +279,7 @@ static int check_Execution_setOutput(ANeuralNetworksExecution* execution, int32_
);
return ret;
}
static int check_Execution_setOutputFromMemory(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
int check_Execution_setOutputFromMemory(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
CAFFE_ENFORCE(nnapi_.Execution_setOutputFromMemory);
int ret = nnapi_.Execution_setOutputFromMemory(execution,index,type,memory,offset,length);
// TODO: Maybe add better logging here.
@ -289,7 +289,7 @@ static int check_Execution_setOutputFromMemory(ANeuralNetworksExecution* executi
);
return ret;
}
static int check_Execution_startCompute(ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event) {
int check_Execution_startCompute(ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event) {
CAFFE_ENFORCE(nnapi_.Execution_startCompute);
int ret = nnapi_.Execution_startCompute(execution,event);
// TODO: Maybe add better logging here.
@ -299,7 +299,7 @@ static int check_Execution_startCompute(ANeuralNetworksExecution* execution, ANe
);
return ret;
}
static int check_Event_wait(ANeuralNetworksEvent* event) {
int check_Event_wait(ANeuralNetworksEvent* event) {
CAFFE_ENFORCE(nnapi_.Event_wait);
int ret = nnapi_.Event_wait(event);
// TODO: Maybe add better logging here.
@ -309,11 +309,11 @@ static int check_Event_wait(ANeuralNetworksEvent* event) {
);
return ret;
}
static void check_Event_free(ANeuralNetworksEvent* event) {
void check_Event_free(ANeuralNetworksEvent* event) {
CAFFE_ENFORCE(nnapi_.Event_free);
nnapi_.Event_free(event);
}
static int check_Execution_getOutputOperandRank(ANeuralNetworksExecution* execution, int32_t index, uint32_t* rank) {
int check_Execution_getOutputOperandRank(ANeuralNetworksExecution* execution, int32_t index, uint32_t* rank) {
CAFFE_ENFORCE(nnapi_.Execution_getOutputOperandRank);
int ret = nnapi_.Execution_getOutputOperandRank(execution,index,rank);
// TODO: Maybe add better logging here.
@ -323,7 +323,7 @@ static int check_Execution_getOutputOperandRank(ANeuralNetworksExecution* execut
);
return ret;
}
static int check_Execution_getOutputOperandDimensions(ANeuralNetworksExecution* execution, int32_t index, uint32_t* dimensions) {
int check_Execution_getOutputOperandDimensions(ANeuralNetworksExecution* execution, int32_t index, uint32_t* dimensions) {
CAFFE_ENFORCE(nnapi_.Execution_getOutputOperandDimensions);
int ret = nnapi_.Execution_getOutputOperandDimensions(execution,index,dimensions);
// TODO: Maybe add better logging here.

View File

@ -83,7 +83,7 @@ QTensorImpl* get_qtensorimpl(const TensorBase& self) {
return static_cast<QTensorImpl*>(self.unsafeGetTensorImpl());
}
static int64_t get_sub_byte_tensor_size(IntArrayRef sizes, size_t dtype_itemsize, at::ScalarType t) {
int64_t get_sub_byte_tensor_size(IntArrayRef sizes, size_t dtype_itemsize, at::ScalarType t) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t element_per_byte;
switch(t) {
@ -178,7 +178,7 @@ Tensor PerTensorAffineQuantizer::quantize(const Tensor& rtensor) {
return qtensor;
}
static void per_tensor_affine_dequantize_impl(
void per_tensor_affine_dequantize_impl(
Tensor& rtensor,
const Tensor& qtensor,
const double scale,
@ -228,7 +228,7 @@ Tensor PerChannelAffineQuantizer::quantize(const Tensor& rtensor) {
return qtensor;
}
static void per_channel_affine_dequantize_impl(
void per_channel_affine_dequantize_impl(
Tensor& rtensor,
const Tensor& qtensor,
const Tensor& scale,
@ -278,7 +278,7 @@ Tensor PerChannelAffineFloatQParamsQuantizer::quantize(const Tensor& rtensor) {
return qtensor;
}
static void per_channel_affine_float_q_params_dequantize_impl(
void per_channel_affine_float_q_params_dequantize_impl(
Tensor& rtensor,
const Tensor& qtensor,
const Tensor& scale,

View File

@ -22,9 +22,6 @@ class VulkanImplRegistrar {
};
at::Tensor& vulkan_copy_(at::Tensor& self, const at::Tensor& src);
namespace native {
bool is_vulkan_available();
}// namespace native
} // namespace vulkan
} // namespace at

View File

@ -17,7 +17,7 @@
namespace F = torch::nn::functional;
static F::PadFuncOptions::mode_t _get_pad_mode_from_conv_padding_mode(
F::PadFuncOptions::mode_t _get_pad_mode_from_conv_padding_mode(
torch::nn::detail::conv_padding_mode_t conv_padding_mode) {
F::PadFuncOptions::mode_t pad_mode;
if (c10::get_if<torch::enumtype::kReflect>(&conv_padding_mode)) {

View File

@ -28,7 +28,7 @@ namespace nn {
/// https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnRNNMode_t
enum class CuDNNMode { RNN_RELU = 0, RNN_TANH = 1, LSTM = 2, GRU = 3 };
static CuDNNMode get_cudnn_mode_for_rnn(
CuDNNMode get_cudnn_mode_for_rnn(
detail::RNNOptionsBase::rnn_options_base_mode_t mode) {
if (c10::get_if<enumtype::kRNN_RELU>(&mode)) {
return CuDNNMode::RNN_RELU;
@ -43,7 +43,7 @@ static CuDNNMode get_cudnn_mode_for_rnn(
}
}
static Tensor apply_permutation(
Tensor apply_permutation(
const Tensor& tensor,
const Tensor& permutation,
int64_t dim = 1) {
@ -397,8 +397,8 @@ template class RNNImplBase<RNNImpl>;
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RNN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
static detail::RNNOptionsBase::rnn_options_base_mode_t
compute_rnn_options_base_mode(RNNOptions::nonlinearity_t nonlinearity) {
detail::RNNOptionsBase::rnn_options_base_mode_t compute_rnn_options_base_mode(
RNNOptions::nonlinearity_t nonlinearity) {
if (c10::get_if<enumtype::kTanh>(&nonlinearity)) {
return torch::kRNN_TANH;
} else if (c10::get_if<enumtype::kReLU>(&nonlinearity)) {

View File

@ -187,7 +187,7 @@ std::tuple<double, Tensor> LBFGS::_directional_evaluate(
return std::make_tuple(loss, flat_grad);
}
static double _cubic_interpolate(
double _cubic_interpolate(
double x1,
double f1,
double g1,
@ -236,7 +236,7 @@ using Function = std::function<std::tuple<double, Tensor>(
const std::vector<Tensor>& x,
double t,
const Tensor& d)>;
static std::tuple<double, Tensor, double, int64_t> _strong_wolfe(
std::tuple<double, Tensor, double, int64_t> _strong_wolfe(
const Function& obj_func,
const std::vector<Tensor>& x,
double t,

View File

@ -13,7 +13,7 @@
namespace c10d {
static ProcessGroup::BackendType strToBackendType(std::string backend) {
ProcessGroup::BackendType strToBackendType(std::string backend) {
if (backend == "undefined") {
return ProcessGroup::BackendType::UNDEFINED;
} else if (backend == "gloo") {
@ -29,7 +29,7 @@ static ProcessGroup::BackendType strToBackendType(std::string backend) {
}
}
static std::string backendTypeToStr(ProcessGroup::BackendType backendType) {
std::string backendTypeToStr(ProcessGroup::BackendType backendType) {
switch (backendType) {
case ProcessGroup::BackendType::UNDEFINED:
return "undefined";

View File

@ -2596,7 +2596,7 @@ c10::intrusive_ptr<Work> ProcessGroupGloo::alltoall_base(
return work;
}
static at::Tensor& checkSingleTensor(std::vector<at::Tensor>& tensors) {
at::Tensor& checkSingleTensor(std::vector<at::Tensor>& tensors) {
if (tensors.size() != 1) {
TORCH_CHECK(false, "ProcessGroupGloo::send takes a single tensor");
}
@ -2610,7 +2610,7 @@ static at::Tensor& checkSingleTensor(std::vector<at::Tensor>& tensors) {
return tensor;
}
static uint32_t checkTag(int32_t tag) {
uint32_t checkTag(int32_t tag) {
TORCH_CHECK(tag >= 0, "Tag must be nonnegative");
return (uint32_t)tag;
}

View File

@ -9,7 +9,7 @@ namespace quantization {
// TODO: The kernels are copied from fbgemm_gpu, we should dedup them later
static void FloatToBFloat16Quantized_ref(
void FloatToBFloat16Quantized_ref(
const float* const input,
const size_t nrows,
const size_t ncols,
@ -26,7 +26,7 @@ static void FloatToBFloat16Quantized_ref(
}
}
static void BFloat16QuantizedToFloat_ref(
void BFloat16QuantizedToFloat_ref(
const at::BFloat16* const input,
const size_t nrows,
const size_t ncols,

View File

@ -41,7 +41,7 @@ std::unordered_map<std::string, worker_id_t> collectNames(
return nameToId;
}
static std::vector<std::string> splitString(
std::vector<std::string> splitString(
const std::string& s,
const std::string& delim) {
std::vector<std::string> tokens;
@ -154,7 +154,7 @@ const string storeKeyActiveCallCount = "ACTIVE_CALLS";
const string storeKeyReady = "READY";
static std::atomic<int> barrierId(0);
static std::tuple<std::string, std::string, std::string> getNextKeyIds() {
std::tuple<std::string, std::string, std::string> getNextKeyIds() {
barrierId++;
std::string processCountKey =
fmt::format("{}{}{}", storeKeyProcessCount, storeKeyBarrierId, barrierId);

View File

@ -7,7 +7,7 @@ namespace torch {
namespace distributed {
namespace rpc {
static std::string fromVecToString(const std::vector<char>& vec) {
std::string fromVecToString(const std::vector<char>& vec) {
return std::string(vec.begin(), vec.end());
}

View File

@ -1,25 +1,25 @@
#include <c10/macros/Export.h>
#include <ittnotify.h>
#include <torch/csrc/itt_wrapper.h>
#include <torch/csrc/profiler/stubs/base.h>
namespace torch {
namespace profiler {
__itt_domain* _itt_domain = __itt_domain_create("PyTorch");
bool itt_is_available() {
TORCH_API bool itt_is_available() {
return torch::profiler::impl::ittStubs()->enabled();
}
void itt_range_push(const char* msg) {
TORCH_API void itt_range_push(const char* msg) {
__itt_string_handle* hsMsg = __itt_string_handle_create(msg);
__itt_task_begin(_itt_domain, __itt_null, __itt_null, hsMsg);
}
void itt_range_pop() {
TORCH_API void itt_range_pop() {
__itt_task_end(_itt_domain);
}
void itt_mark(const char* msg) {
TORCH_API void itt_mark(const char* msg) {
__itt_string_handle* hsMsg = __itt_string_handle_create(msg);
__itt_task_begin(_itt_domain, __itt_null, __itt_null, hsMsg);
__itt_task_end(_itt_domain);

View File

@ -1,13 +1,12 @@
#ifndef PROFILER_ITT_H
#define PROFILER_ITT_H
#include <c10/macros/Export.h>
namespace torch {
namespace profiler {
TORCH_API bool itt_is_available();
TORCH_API void itt_range_push(const char* msg);
TORCH_API void itt_range_pop();
TORCH_API void itt_mark(const char* msg);
bool itt_is_available();
void itt_range_push(const char* msg);
void itt_range_pop();
void itt_mark(const char* msg);
} // namespace profiler
} // namespace torch

View File

@ -55,7 +55,7 @@ T& toGraphFunctionImpl(F& function) {
} // namespace
static void placeholderCreator(GraphFunction&) {
void placeholderCreator(GraphFunction&) {
throw RecursiveMethodCallError();
}

View File

@ -163,7 +163,7 @@ void Module::to(at::Device device, bool non_blocking) {
to_impl(device, /*dtype=*/c10::nullopt, non_blocking);
}
static void module_state_to(
void module_state_to(
const autograd::Variable& variable,
const c10::optional<at::Device>& device,
const c10::optional<at::ScalarType>& dtype,

View File

@ -53,8 +53,7 @@ bool hasFusionBackend(at::Device::Type backend_type) {
return getFusionBackends().count(backend_type);
}
static const FusedKernelConstructor& getConstructor(
at::Device::Type backend_type) {
const FusedKernelConstructor& getConstructor(at::Device::Type backend_type) {
std::lock_guard<std::mutex> guard(fusionBackendLock());
return getFusionBackends().at(backend_type);
}

View File

@ -190,7 +190,7 @@ static void compressContiguous(
// Launches the requested fusion on the given device with the given inputs.
// Output pointers are stored in outputs (to be put on the stack later).
static void launchFusion(
void launchFusion(
const FusedKernel& fusion,
const at::Device device,
const at::ArrayRef<at::Tensor>& inputs,

View File

@ -12,7 +12,7 @@ namespace torch::jit {
// Transforms a Loop that has both a trip count specified and a loop
// body condition so that the iter count is no longer specified
// and it is recognizable as a python while loop.
static void canonicalizeModifiedLoop(Node* n) {
void canonicalizeModifiedLoop(Node* n) {
LoopView loop(n);
if (loop.loopType() != LoopView::ModifiedLoop) {
return;
@ -48,7 +48,7 @@ static void canonicalizeModifiedLoop(Node* n) {
loop.bodyBlock()->insertOutput(0, new_condition);
}
static void canonicalizeModifiedLoops(Block* block) {
void canonicalizeModifiedLoops(Block* block) {
for (Node* n : block->nodes()) {
for (Block* b : n->blocks()) {
canonicalizeModifiedLoops(b);

View File

@ -522,7 +522,7 @@ struct ExitTransformer {
std::shared_ptr<Graph> graph_;
};
static bool inlineConsecutiveIfs(Node* node) {
bool inlineConsecutiveIfs(Node* node) {
if (node->kind() != prim::If || node->next()->kind() != prim::If) {
return false;
}
@ -605,7 +605,7 @@ static bool inlineConsecutiveIfs(Node* node) {
// return 1
// else:
// return 2
static void inlineConsecutiveIfs(Block* block) {
void inlineConsecutiveIfs(Block* block) {
for (auto it = block->nodes().begin(), end = block->nodes().end();
it != end;) {
for (Block* b : it->blocks()) {

View File

@ -30,7 +30,7 @@ void InlineBlockBeforeNode(Node* before_node, Block* block) {
// <body>
// BlockExit(continue_condition, loop_carried_block*)
// }
static void inlineLoopCondition(Node* n) {
void inlineLoopCondition(Node* n) {
Block* body_block = n->blocks().at(0);
auto pre_header = n->blocks().at(1);
@ -45,7 +45,7 @@ static void inlineLoopCondition(Node* n) {
n->eraseBlock(1);
}
static void inlineLoopCondition(Block* block) {
void inlineLoopCondition(Block* block) {
for (Node* n : block->nodes()) {
for (Block* b : n->blocks()) {
inlineLoopCondition(b);

View File

@ -187,7 +187,7 @@ struct CondValue {
};
enum NoneStatus { ALWAYS, MAYBE, NEVER };
static NoneStatus canBeNone(Value* v) {
NoneStatus canBeNone(Value* v) {
if (v->node()->mustBeNone()) {
return ALWAYS;
}
@ -5605,7 +5605,7 @@ std::vector<Function*> CompilationUnit::define(
self);
}
static void eraseListLiterals(std::shared_ptr<Graph>& graph) {
void eraseListLiterals(std::shared_ptr<Graph>& graph) {
DepthFirstGraphNodeIterator it(graph);
for (auto next_node = it.next(); next_node != nullptr;) {

View File

@ -548,6 +548,17 @@ MatchedSchema matchSchema(
throw ErrorReport(loc) << failure_messages.str();
}
MatchedSchema matchSchema(
const ::c10::FunctionSchema& schema,
const SourceRange& loc,
Graph& graph,
at::ArrayRef<Value*> args,
at::ArrayRef<NamedValue> kwargs) {
std::vector<NamedValue> named_args =
fmap(args, [](Value* v) { return NamedValue(v); });
return matchSchema(schema, loc, graph, named_args, kwargs);
}
static std::string prefixLine(
const std::string& str,
const std::string& prefix) {

View File

@ -110,7 +110,7 @@ void TracingState::delValue(const IValue& var) {
Value* getValueTrace(const IValue& var) {
return getTracingState()->getValue(var);
}
static Value* getOptTensorValueTrace(const c10::optional<at::Tensor>& var) {
Value* getOptTensorValueTrace(const c10::optional<at::Tensor>& var) {
return getValueTrace(IValue(var));
}
Value* TracingState::getValue(const IValue& var) {
@ -783,6 +783,19 @@ void addInputs(
n->addInput(list_node->output());
}
void addInputs(
Node* n,
const char* name,
c10::optional<caffe2::TypeMeta> opt_dtype) {
if (opt_dtype.has_value()) {
return addInputs(n, name, at::typeMetaToScalarType(*opt_dtype));
} else {
Graph* g = n->owningGraph();
Value* none = g->insertNode(g->createNone())->output();
n->addInput(none);
}
}
void addInputs(Node* n, const char* name, at::IntArrayRef value) {
using ArgumentStash = jit::tracer::ArgumentStash;
std::vector<Value*> info = ArgumentStash::hasIntArrayRef(name)
@ -1049,7 +1062,7 @@ void ArgumentStash::stashValue(
// Stack trace recording
////////////////////////////////////////////////////////////////////////////////
// no python present so we just do not record source information
static void defaultRecordSourceLocation(Node* n) {}
void defaultRecordSourceLocation(Node* n) {}
std::atomic<decltype(&defaultRecordSourceLocation)> record_source_location(
defaultRecordSourceLocation);
void recordSourceLocation(Node* n) {
@ -1059,7 +1072,7 @@ void setRecordSourceLocation(void (*v)(Node*)) {
record_source_location.store(v);
}
static std::vector<StackEntry> defaultPythonCallstack() {
std::vector<StackEntry> defaultPythonCallstack() {
return std::vector<StackEntry>();
}
std::atomic<decltype(&defaultPythonCallstack)> python_callstack_fn(
@ -1071,7 +1084,7 @@ void setPythonCallstack(std::vector<StackEntry> (*v)()) {
python_callstack_fn.store(v);
}
static void defaultWarn(const std::string& str) {
void defaultWarn(const std::string& str) {
TORCH_WARN(str);
}
std::atomic<warn_fn_type> warn_callback{defaultWarn};

View File

@ -8,13 +8,13 @@
namespace torch::jit {
static bool insertableTensor(const at::Tensor& ten) {
bool insertableTensor(const at::Tensor& ten) {
// bail if tensor has no storage i.e. opaque tensor used in MKLdnn.
// or gradients because we have no way of serializing them & are mutable
return !ten.requires_grad() && ten.has_storage() && !ten.is_nested();
}
static bool insertableIValue(const IValue& ivalue) {
bool insertableIValue(const IValue& ivalue) {
if (ivalue.isInt() || ivalue.isNone() || ivalue.isBool() ||
ivalue.isDouble() || ivalue.isComplexDouble() || ivalue.isString() ||
ivalue.isDevice() || ivalue.isEnum()) {

View File

@ -122,15 +122,13 @@ static std::ostream& printValueRefs(
// Can't make these two overloads directly a template, it'll be ambiguous with
// the global printer for operator<<.
static std::ostream& operator<<(
std::ostream& operator<<(
std::ostream& out,
const at::ArrayRef<const Value*> nodes) {
return printValueRefs(out, nodes);
}
static std::ostream& operator<<(
std::ostream& out,
const at::ArrayRef<Value*> nodes) {
std::ostream& operator<<(std::ostream& out, const at::ArrayRef<Value*> nodes) {
return printValueRefs(out, nodes);
}
@ -143,7 +141,7 @@ struct const_value_list_with_types {
: values(values), delim(std::move(delim_)) {}
};
static std::ostream& operator<<(
std::ostream& operator<<(
std::ostream& out,
const const_value_list_with_types& l) {
size_t i = 0;
@ -969,7 +967,7 @@ void Value::replaceAllUsesDominatedByNodeWith(
uses_.end());
}
static size_t findArgument(
size_t findArgument(
const FunctionSchema& the_schema,
const std::string& unqualName) {
for (const auto i : c10::irange(the_schema.arguments().size())) {
@ -982,7 +980,7 @@ static size_t findArgument(
std::string("Couldn't find an argument called ") + unqualName);
}
static size_t findArgument(const FunctionSchema& the_schema, Symbol name) {
size_t findArgument(const FunctionSchema& the_schema, Symbol name) {
const auto unqualName = name.toUnqualString();
return findArgument(the_schema, unqualName);
}
@ -2049,7 +2047,7 @@ void inlineCallStackOfNode(
Node* to_replace,
c10::optional<ModuleInstanceInfo> m_info);
static void inlineCallStackOfBlock(
void inlineCallStackOfBlock(
Block* b,
std::unordered_map<InlinedCallStack*, InlinedCallStackPtr>& new_cs_entries,
Function* callee,

View File

@ -14,7 +14,7 @@
namespace torch {
namespace jit {
static std::unordered_map<std::string, int64_t>& passes_to_current_counter() {
std::unordered_map<std::string, int64_t>& passes_to_current_counter() {
static std::unordered_map<std::string, int64_t> passes_to_current_counter;
return passes_to_current_counter;
}

View File

@ -95,7 +95,7 @@ uint64_t _get_model_bytecode_version(
return _get_model_bytecode_version_from_bytes(data.get(), size);
}
static uint64_t _get_model_bytecode_version_zip(
uint64_t _get_model_bytecode_version_zip(
std::shared_ptr<ReadAdapterInterface> rai) {
if (!check_zip_file(rai)) {
TORCH_CHECK(

View File

@ -238,7 +238,7 @@ std::map<std::string, at::Tensor> mobile_module_to_parameter_map(
"' in deserialized mobile::Module");
}
static std::map<std::string, at::Tensor> _load_parameters_bytes(
std::map<std::string, at::Tensor> _load_parameters_bytes(
std::shared_ptr<char> data,
size_t size,
c10::optional<at::Device> device) {

View File

@ -316,7 +316,7 @@ c10::IValue Method::operator()(std::vector<c10::IValue> stack) const {
return stack.front();
}
static c10::optional<std::string> print_type(const c10::Type& t) {
c10::optional<std::string> print_type(const c10::Type& t) {
auto namedType = t.cast<c10::NamedType>();
if (namedType && namedType->name()) {
return namedType->name().value().qualifiedName();

View File

@ -4,8 +4,7 @@ namespace torch {
namespace jit {
namespace mobile {
static std::unordered_map<std::string, std::function<void(Stack&)>>&
primOpsFnTable() {
std::unordered_map<std::string, std::function<void(Stack&)>>& primOpsFnTable() {
static std::unordered_map<std::string, std::function<void(Stack&)>>
prim_ops_fn;
return prim_ops_fn;

View File

@ -21,7 +21,6 @@ class SGDParamState {
return std::make_unique<SGDParamState>(
static_cast<const SGDParamState&>(*this));
}
friend bool operator==(const SGDParamState& lhs, const SGDParamState& rhs);
~SGDParamState() = default;
};

View File

@ -5,7 +5,7 @@
namespace torch {
namespace jit {
static void AnnotateWarns(Block* b) {
void AnnotateWarns(Block* b) {
static std::atomic<int64_t> idx(0);
for (Node* n : b->nodes()) {
for (Block* child_b : n->blocks()) {

View File

@ -83,7 +83,7 @@ c10::AliasAnalysisKind aliasAnalysisIsSpecialCase() {
// Tunable parameter. Set to something larger if it turns out to be better.
static constexpr size_t min_fusion_size = 4;
static bool have_same_shape(at::TensorList inputs) {
bool have_same_shape(at::TensorList inputs) {
auto expected_sizes = inputs[0].sizes();
return (std::all_of(
inputs.begin(), inputs.end(), [expected_sizes](const at::Tensor& t) {
@ -91,19 +91,17 @@ static bool have_same_shape(at::TensorList inputs) {
}));
}
static bool should_be_transposed(at::TensorList inputs) {
bool should_be_transposed(at::TensorList inputs) {
return (std::all_of(inputs.begin(), inputs.end(), [](const at::Tensor& t) {
return t.stride(0) == 1 && t.stride(1) == t.size(0);
}));
}
static std::vector<at::Tensor> transpose_inputs(at::TensorList inputs) {
std::vector<at::Tensor> transpose_inputs(at::TensorList inputs) {
return fmap(inputs, [](const at::Tensor& i) { return i.t(); });
}
static bool shape_is_fast_for_reduce(
const at::Tensor& lhs,
const at::Tensor& rhs) {
bool shape_is_fast_for_reduce(const at::Tensor& lhs, const at::Tensor& rhs) {
size_t l = lhs.size(0);
size_t m = lhs.size(1);
size_t r = rhs.size(1);
@ -253,7 +251,7 @@ struct TreeToken {
enum class Side { LHS, RHS };
static void BatchMMTreeReduce(Block* block, AliasDb& alias_db) {
void BatchMMTreeReduce(Block* block, AliasDb& alias_db) {
auto graph = block->owningGraph();
// Look for trees in the block
@ -318,7 +316,7 @@ static void BatchMMTreeReduce(Block* block, AliasDb& alias_db) {
}
}
static bool shape_is_fast_for_side(const at::Tensor& other_side_input) {
bool shape_is_fast_for_side(const at::Tensor& other_side_input) {
// Cutoff chosed by benchmarking on a TITAN V
return other_side_input.numel() <= 1024 * 2048;
}
@ -370,7 +368,7 @@ RegisterOperators mm_batch_side_reg({Operator(
},
aliasAnalysisIsSpecialCase())});
static std::pair<std::vector<Node*>, std::vector<Node*>> gatherIndependentMMUses(
std::pair<std::vector<Node*>, std::vector<Node*>> gatherIndependentMMUses(
Value* value,
AliasDb& alias_db) {
const auto postprocess = [&](std::vector<Node*> mms) {
@ -415,7 +413,7 @@ static std::pair<std::vector<Node*>, std::vector<Node*>> gatherIndependentMMUses
postprocess(std::move(lhses)), postprocess(std::move(rhses)));
}
static void BatchMMSide(Block* block, AliasDb& alias_db) {
void BatchMMSide(Block* block, AliasDb& alias_db) {
// NB: 8 is the current loop unrolling factor
static constexpr size_t how_many_is_many = 8;
const auto batch_side = [&](std::vector<Node*>& mms, Side side) {
@ -464,7 +462,7 @@ static void BatchMMSide(Block* block, AliasDb& alias_db) {
}
}
static bool hasMutableOperators(Block* block) {
bool hasMutableOperators(Block* block) {
for (auto n : block->nodes()) {
if (n->kind().is_aten() && n->schema().is_mutable())
return true;
@ -476,7 +474,7 @@ static bool hasMutableOperators(Block* block) {
return false;
}
static bool hasMMOperators(std::shared_ptr<Graph>& graph) {
bool hasMMOperators(std::shared_ptr<Graph>& graph) {
DepthFirstGraphNodeIterator it(graph);
Node* n = nullptr;
while ((n = it.next()) != nullptr) {

View File

@ -51,7 +51,7 @@ std::shared_ptr<Graph> Canonicalize(
}
// Which index in b's owning Node is b
static size_t blockIndex(const Block* b) {
size_t blockIndex(const Block* b) {
auto n = b->owningNode();
AT_ASSERT(n);
for (size_t i = 0; i < n->blocks().size(); ++i) {
@ -73,7 +73,7 @@ static size_t blockIndex(const Block* b) {
* NB: this is not a topological index. Topologically, two nodes in
* different blocks of an if node are not topologically < or > each other.
*/
static bool isBefore(Node* n1, Node* n2) {
bool isBefore(Node* n1, Node* n2) {
// Invalid to call with the same node as both args
AT_ASSERT(n1 != n2);
@ -122,7 +122,7 @@ static bool isBefore(Node* n1, Node* n2) {
}
}
static bool isBefore(const Use& a, const Use& b) {
bool isBefore(const Use& a, const Use& b) {
// If two uses are the same node, we order on offset
if (a.user == b.user) {
return a.offset < b.offset;
@ -131,7 +131,7 @@ static bool isBefore(const Use& a, const Use& b) {
return isBefore(a.user, b.user);
}
static bool isAfter(const Use& a, const Use& b) {
bool isAfter(const Use& a, const Use& b) {
if (a.user == b.user && a.offset == b.offset) {
return false;
}
@ -157,14 +157,14 @@ c10::optional<const Use> firstOrLastUse(Value* v, bool find_first) {
return extreme_use;
}
static std::vector<c10::optional<const Use>> gatherFirstUses(
std::vector<c10::optional<const Use>> gatherFirstUses(
at::ArrayRef<Value*> values) {
return fmap(values, [&](Value* v) -> c10::optional<const Use> {
return firstOrLastUse(v, true);
});
}
static std::vector<size_t> sort_indexes(at::ArrayRef<Value*> values) {
std::vector<size_t> sort_indexes(at::ArrayRef<Value*> values) {
// initialize original index locations
std::vector<size_t> idx(values.size());
std::iota(idx.begin(), idx.end(), 0);
@ -194,17 +194,17 @@ static std::vector<size_t> sort_indexes(at::ArrayRef<Value*> values) {
return idx;
}
static void CanonicalizeLoopOutputs(Node* n) {
void CanonicalizeLoopOutputs(Node* n) {
auto new_indices = sort_indexes(n->outputs());
LoopView(n).permuteLoopCarried(new_indices);
}
static void CanonicalizeIfOutputs(Node* n) {
void CanonicalizeIfOutputs(Node* n) {
auto new_indices = sort_indexes(n->outputs());
IfView(n).permuteOutputs(new_indices);
}
static void CanonicalizeOutputs(Block* block) {
void CanonicalizeOutputs(Block* block) {
// We iterate in reverse since ordering of a node's outputs is dependent on
// the value use following it in the graph
for (Node* n : block->nodes().reverse()) {

View File

@ -22,12 +22,12 @@ bool isStrictFusion(Value* value) {
} // namespace
static bool fusionGuardCheck(Symbol k) {
bool fusionGuardCheck(Symbol k) {
return k == Symbol::prim("TensorExprDynamicGuard") || k == prim::TypeCheck ||
k == prim::CudaFusionGuard || k == prim::RequiresGradCheck;
}
static std::unordered_set<Node*> collectValuesUsedInGuard(
std::unordered_set<Node*> collectValuesUsedInGuard(
Node* guarding_if,
Node* enter_node) {
// DFS to collect
@ -58,7 +58,7 @@ static std::unordered_set<Node*> collectValuesUsedInGuard(
return visited_nodes;
}
static void checkForUnfusedOps(Node* enter_node) {
void checkForUnfusedOps(Node* enter_node) {
std::vector<Node*> unsupported_nodes;
std::vector<Node*> guarding_ifs; // if multiple, we will throw
for (Node* node = enter_node->next(); node->kind() != prim::Exit;

View File

@ -5,7 +5,7 @@
namespace torch {
namespace jit {
static void clearUndefinedness(Value* o) {
void clearUndefinedness(Value* o) {
if (o->type()->kind() == TensorType::Kind) {
o->setType(TensorType::get());
} else if (
@ -16,7 +16,7 @@ static void clearUndefinedness(Value* o) {
}
}
static void clearUndefinedness(Block* block) {
void clearUndefinedness(Block* block) {
for (auto n : block->nodes()) {
for (auto o : n->outputs()) {
clearUndefinedness(o);

View File

@ -22,7 +22,7 @@ c10::AliasAnalysisKind aliasAnalysisFromSchema() {
// helper to determine if an optional tensor argument/value passed in is
// statically defined (neither a None constant nor a Optional[Tensor] type)
// return yes, no, or no value if we can't tell
static c10::optional<bool> isDefined(Value* tensor) {
c10::optional<bool> isDefined(Value* tensor) {
if (tensor->type()->isSubtypeOf(*TensorType::get())) {
return true;
}
@ -32,7 +32,7 @@ static c10::optional<bool> isDefined(Value* tensor) {
return {};
}
static bool isDecomposableNorm(Node* normalize_op) {
bool isDecomposableNorm(Node* normalize_op) {
static const OperatorSet decomposable_normalization_ops = {
"aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor",
"aten::layer_norm(Tensor input, int[] normalized_shape, Tensor? weight, Tensor? bias, float eps, bool cudnn_enable) -> Tensor",
@ -85,7 +85,7 @@ RegisterOperators reg_ops(
},
aliasAnalysisFromSchema())});
static bool DecomposeOps(Block* block, CompilationUnit& decompose_funcs) {
bool DecomposeOps(Block* block, CompilationUnit& decompose_funcs) {
bool decomposed = false;
for (auto it = block->nodes().begin(), end = block->nodes().end(); it != end;
++it) {

View File

@ -9,7 +9,7 @@
namespace torch {
namespace jit {
static void SetNumTypeToTensorType(Value* v) {
void SetNumTypeToTensorType(Value* v) {
if (v->type()->isSubtypeOf(*NumberType::get())) {
v->setType(TensorType::fromNumberType(*v->type()));
} else if (v->type()->isSubtypeOf(*BoolType::get())) {

View File

@ -34,7 +34,7 @@ c10::optional<IValue> getIValue(
return toIValue(getValue(name, match_vmap, vmap));
}
static std::unordered_map<std::string, c10::IValue> getConvParams(
std::unordered_map<std::string, c10::IValue> getConvParams(
const Match& match,
const std::unordered_map<std::string, Value*>& vmap) {
std::unordered_map<std::string, c10::IValue> calc_values;

View File

@ -36,7 +36,7 @@ namespace jit {
// %n =
// prim::GetAttr[name="{prefix}.name1{...}.name(n-1)._packed_params"][%self]
//
static void hoistConvPackedParams(
void hoistConvPackedParams(
Module& rootModule,
Node* getConvPackedParamsNode,
const std::string& prefix,

View File

@ -4,7 +4,7 @@
namespace torch {
namespace jit {
static void InlineForkWait(
void InlineForkWait(
Block* b,
std::unordered_map<Value*, Value*>& future_remap) {
auto nodes = b->nodes();

View File

@ -16,7 +16,7 @@ namespace jit {
// subgraph, replace the context unpacking value with the new graph input.
// fork(foo) ->
// def foo(a, b):
static void inlineForkedClosure(Node* fork_closure, NodeKind genKind) {
void inlineForkedClosure(Node* fork_closure, NodeKind genKind) {
Node* function_context_node = fork_closure->input()->node();
if (function_context_node->inputs().size() != 2 ||
@ -58,7 +58,7 @@ static void inlineForkedClosure(Node* fork_closure, NodeKind genKind) {
runCleanupPasses(fork_graph);
}
static void inlineForkedClosures(Block* block) {
void inlineForkedClosures(Block* block) {
for (auto it = block->nodes().begin(); it != block->nodes().end();) {
Node* n = *it;
it++;

View File

@ -30,7 +30,7 @@ GraphFunction* tryToGraphFunction(Node* n) {
return nullptr;
}
static void inlineCalls(Block* block) {
void inlineCalls(Block* block) {
for (auto it = block->nodes().begin(), end = block->nodes().end();
it != end;) {
Node* cur = *it++;

View File

@ -3,7 +3,7 @@
namespace torch {
namespace jit {
static void CheckInplace(Block* block) {
void CheckInplace(Block* block) {
for (auto node : block->nodes()) {
if (node->kind() == prim::PythonOp && node->hasAttribute(attr::inplace)) {
if (node->i(attr::inplace)) {

View File

@ -16,7 +16,7 @@ namespace jit {
// closure block.
// Within the closure subgraph, the context tuple is unpacked and the unpacked
// values are used for closed over values.
static void liftClosure(Node* closure) {
void liftClosure(Node* closure) {
auto block = closure->blocks().at(0);
auto subgraph = std::make_shared<Graph>();
// closures/forks can be nested, so use closure owning graph
@ -56,7 +56,7 @@ static void liftClosure(Node* closure) {
runCleanupPasses(closure->g(attr::Subgraph));
}
static void liftClosures(Block* block) {
void liftClosures(Block* block) {
for (auto it = block->nodes().begin(); it != block->nodes().end();) {
Node* n = *it;
it++;

View File

@ -21,7 +21,7 @@ struct Slot {
// parameters/attributes with extra_ivalue input Slots that hold what value to
// pass into the graph. Used for ONNX export to remove first-class modules
// so it can deal purely with parameters and inputs
static std::pair<std::shared_ptr<Graph>, std::vector<Slot>> lower_graph(
std::pair<std::shared_ptr<Graph>, std::vector<Slot>> lower_graph(
const ModulePtr& self,
Graph& g_,
size_t self_offset = 0) {

View File

@ -240,12 +240,31 @@ void metalFusePrePackedConvWithClamp(script::Module& module) {
fuseHardtanhWithPackedOps(graph);
}
static void metalRemoveMutation(script::Module& module) {
void metalInsertCopyOps(script::Module& module) {
auto graph = module.get_method("forward").graph();
auto&& outputs = graph->outputs();
for (const auto i : c10::irange(outputs.size())) {
Value* output = outputs[i];
auto namedValue = NamedValue("", output);
if (namedValue.type()->kind() == TypeKind::TensorType) {
// find the insertion point
WithInsertPoint ip(output->node()->next());
Value* replaced_output = graph->insert(
Symbol::fromQualString("metal::copy_to_host"), {namedValue});
// replaced the output
graph->block()->replaceOutput(i, replaced_output);
}
}
SubgraphRewriter rewriter;
rewriter.runOnGraph(graph);
}
void metalRemoveMutation(script::Module& module) {
auto graph = module.get_method("forward").graph();
RemoveTensorMutation(graph);
}
static void metalRunCanonicalOptimizations(script::Module& module) {
void metalRunCanonicalOptimizations(script::Module& module) {
auto graph = module.get_method("forward").graph();
runOptimization(graph, false /* no loop unrolling */);
}

View File

@ -21,7 +21,7 @@ GraphPassNameType registerPostPass(GraphPass p) {
return graphPassID++;
}
static GraphPassNameType registerPass(GraphPass p) {
GraphPassNameType registerPass(GraphPass p) {
return registerPostPass(std::move(p));
}

View File

@ -332,7 +332,7 @@ struct PeepholeOptimizeImpl {
bool shape_peepholes_;
};
static bool FuseAddMM(Block* block) {
bool FuseAddMM(Block* block) {
bool changed = false;
for (Node* node : block->nodes()) {
// XXX: remember that if you want to simplify an expression by combining

View File

@ -15,7 +15,7 @@
namespace torch {
namespace jit {
static c10::optional<size_t> normalizeIndex(int64_t index, size_t len) {
c10::optional<size_t> normalizeIndex(int64_t index, size_t len) {
if (index < 0) {
index = index + len;
}

View File

@ -1,5 +1,4 @@
#include <torch/csrc/jit/passes/peephole.h>
#include <torch/csrc/jit/passes/peephole_non_tensor.h>
#include <ATen/core/jit_type.h>
#include <c10/util/irange.h>

View File

@ -168,7 +168,7 @@ void FoldQuantizedPrepackingOps(Module& module) {
PrePackingOpsFolder(module, filter_fn, "quantized");
}
static std::unordered_set<std::string> RegisterPrePackingParams(
std::unordered_set<std::string> RegisterPrePackingParams(
Module& module,
const std::string& method_name) {
auto filter_fn = [](const Node* n) -> bool {

View File

@ -253,7 +253,7 @@ bool matchCallFuncToUse(
// Check any use of `v` matches the aten function call
// or CallFunction patterns
static bool matchArgPattern(
bool matchArgPattern(
Value* v,
const AtenFuncArgs& aten_func_args,
const CallFuncArgs& call_func_args) {
@ -395,8 +395,7 @@ std::vector<Value*> getPassThroughInputs(Value* v) {
return {};
}
static std::vector<NodeKind> toAtenSymbol(
const std::vector<std::string>& func_names) {
std::vector<NodeKind> toAtenSymbol(const std::vector<std::string>& func_names) {
std::vector<NodeKind> symbols;
std::transform(
func_names.begin(),
@ -406,18 +405,18 @@ static std::vector<NodeKind> toAtenSymbol(
return symbols;
}
static bool isAtenFunc(Node* n, const std::vector<NodeKind>& aten_funcs) {
bool isAtenFunc(Node* n, const std::vector<NodeKind>& aten_funcs) {
return std::find(aten_funcs.begin(), aten_funcs.end(), n->kind()) !=
aten_funcs.end();
}
static bool isAtenFunc(Node* n, const std::vector<std::string>& aten_funcs) {
bool isAtenFunc(Node* n, const std::vector<std::string>& aten_funcs) {
const auto& symbols = toAtenSymbol(aten_funcs);
return isAtenFunc(n, symbols);
}
// TODO: factor out isCallFunc
static bool isFunctionNode(
bool isFunctionNode(
Node* n,
const std::vector<std::string>& call_funcs,
const std::vector<std::string>& aten_funcs) {
@ -670,7 +669,7 @@ bool is_int_constant(
return v && v->isInt() && v->toInt() == value;
}
static bool is_functional(
bool is_functional(
const Match& match,
const std::unordered_map<std::string, Value*>& vmap,
const std::string& vname,
@ -694,7 +693,7 @@ c10::optional<std::string> getModuleName(Value* value) {
return c10::nullopt;
}
static bool is_module(
bool is_module(
const Match& match,
const std::unordered_map<std::string, Value*>& vmap,
const std::string& vname,

View File

@ -282,7 +282,7 @@ QuantFusionInfo getObservedQParamOpFusionInfo(
} // namespace
static std::vector<QuantFusionInfo> quant_fusion_pattern_and_replacements() {
std::vector<QuantFusionInfo> quant_fusion_pattern_and_replacements() {
// aten::conv1d
std::string conv1d = R"(
graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups):
@ -1105,8 +1105,7 @@ graph(%packed_params, %a):
};
}
static std::vector<QuantFusionInfo>
dynamic_quant_fusion_pattern_and_replacements() {
std::vector<QuantFusionInfo> dynamic_quant_fusion_pattern_and_replacements() {
std::string linear_dynamic = R"(
graph(%packed_params, %a, %reduce_range, %a_dtype):
%a_scale : float, %a_zero_point : int = aten::_choose_qparams_per_tensor(%a, %reduce_range)
@ -1143,7 +1142,7 @@ graph(%packed_params, %a):
};
}
static std::vector<QuantFusionInfo> linear_prepack_unpack_patterns() {
std::vector<QuantFusionInfo> linear_prepack_unpack_patterns() {
std::string linear_with_quant = R"(
graph(%a_dequant, %w_quant, %b):
%w_dequant = aten::dequantize(%w_quant)
@ -1179,7 +1178,7 @@ graph(%w, %a_dq, %b):
};
}
static std::vector<QuantFusionInfo> conv_prepack_unpack_patterns() {
std::vector<QuantFusionInfo> conv_prepack_unpack_patterns() {
std::string conv1d_with_quant = R"(
graph(%a_dequant, %w_quant, %b, %stride, %padding, %dilation, %groups):
%w_dequant = aten::dequantize(%w_quant)

View File

@ -7,7 +7,7 @@
namespace torch {
namespace jit {
static bool certainlyThrows(Block* block) {
bool certainlyThrows(Block* block) {
for (Node* n : block->nodes()) {
if (n->kind() == prim::RaiseException) {
return true;
@ -16,7 +16,7 @@ static bool certainlyThrows(Block* block) {
return false;
}
static void EliminateExceptions(Block* block) {
void EliminateExceptions(Block* block) {
auto graph = block->owningGraph();
Value* false_const = graph->insertConstant(IValue(false));
Value* true_const = graph->insertConstant(IValue(true));

View File

@ -75,7 +75,7 @@ Node* MutationRemover::createSpecialMappedOp(Node* n) {
return new_node;
}
static bool removableSetItem(Node* n) {
bool removableSetItem(Node* n) {
if (n->kind() != aten::_set_item ||
n->input(1)->node()->kind() != prim::Constant) {
return false;

View File

@ -1,5 +1,4 @@
#include <torch/csrc/jit/passes/dead_code_elimination.h>
#include <torch/csrc/jit/passes/remove_redundant_profiles.h>
#include <torch/csrc/jit/ir/alias_analysis.h>
#include <torch/csrc/jit/ir/ir_views.h>

View File

@ -50,7 +50,7 @@ bool mergeTypes(
return changed;
}
static void applyTypes(ArrayRef<Value*> src, ArrayRef<Value*> dst) {
void applyTypes(ArrayRef<Value*> src, ArrayRef<Value*> dst) {
AT_ASSERT(src.size() == dst.size());
for (const auto i : c10::irange(src.size())) {
dst[i]->setType(src[i]->type());

View File

@ -103,7 +103,7 @@ struct ShapeArg
}
};
static std::ostream& operator<<(std::ostream& out, const ShapeArg& sa) {
std::ostream& operator<<(std::ostream& out, const ShapeArg& sa) {
if (auto val = sa.asConstantInt()) {
out << *val;
} else if (auto ss = sa.asShapeSymbol()) {
@ -149,7 +149,7 @@ struct ShapeArguments {
std::vector<ShapeArg> maybe_shape_symbols_;
};
static std::ostream& operator<<(std::ostream& os, const ShapeArguments& sa) {
std::ostream& operator<<(std::ostream& os, const ShapeArguments& sa) {
if (!sa.has_dim()) {
os << "(UNKNOWN DIM)";
return os;
@ -176,7 +176,7 @@ bool symbolicShapeAnalysisTestModeEnabled() {
using SSArgument = c10::variant<ShapeArguments, IValue>;
static std::ostream& operator<<(std::ostream& out, const SSArgument& sa) {
std::ostream& operator<<(std::ostream& out, const SSArgument& sa) {
if (const IValue* iv = c10::get_if<IValue>(&sa)) {
out << *iv;
} else {

View File

@ -20,7 +20,7 @@ namespace jit {
// Inserts the Compute for Each Symbolic Shape in the TensorExpr Graph
// and returns back a map from Symbolic Shape Value to its runtime Value *
static std::map<int64_t, Value*> InsertSymbolicShapesCompute(
std::map<int64_t, Value*> InsertSymbolicShapesCompute(
const ShapeComputeGraphMapping& shape_mapping,
Node* tensorexpr_graph) {
WithInsertPoint guard(tensorexpr_graph);
@ -140,7 +140,7 @@ inline StrideInput summarizeStrideDim(
}
}
static std::vector<StrideInput> summarizeInputStrides(const TensorType& tt) {
std::vector<StrideInput> summarizeInputStrides(const TensorType& tt) {
auto strides = *tt.strides().concrete_sizes();
auto sizes = *tt.sizes().concrete_sizes();
if (c10::is_contiguous_strides(sizes, strides)) {
@ -158,7 +158,7 @@ static std::vector<StrideInput> summarizeInputStrides(const TensorType& tt) {
};
// Todo: incorporate in codegen
static StrideInput summarizeOutputStrides(const TensorType& tt) {
StrideInput summarizeOutputStrides(const TensorType& tt) {
auto strides = *tt.strides().concrete_sizes();
auto sizes = *tt.sizes().concrete_sizes();
// We only try to maintain output striding for channels last tensors,
@ -178,7 +178,7 @@ static StrideInput summarizeOutputStrides(const TensorType& tt) {
// Also summarize input striding behavior. The Size information is stored on the
// type, The striding is returned. See StrideInput for description of stride
// specializations
static c10::optional<std::vector<std::vector<StrideInput>>>
c10::optional<std::vector<std::vector<StrideInput>>>
TryGeneralizeInputDimensionsToSymbolicShapes(
std::shared_ptr<Graph> tensorexpr_graph) {
std::map<size_t, int64_t> shape_to_sym_shape;
@ -212,7 +212,7 @@ TryGeneralizeInputDimensionsToSymbolicShapes(
return input_striding;
}
static void moveConstantTensorsOutOfSubgraph(
void moveConstantTensorsOutOfSubgraph(
Node* tensorexpr_graph_node,
std::shared_ptr<Graph> tensorexpr_graph) {
auto parent = tensorexpr_graph_node->owningGraph();
@ -304,7 +304,7 @@ bool GenerateGuard(Node* tensorexpr_graph_node, bool add_composed_op) {
return true;
}
static void inlineFallbackGraphAndAddSRCopyOutOp(std::shared_ptr<Graph> graph) {
void inlineFallbackGraphAndAddSRCopyOutOp(std::shared_ptr<Graph> graph) {
DepthFirstGraphNodeIterator it(graph);
Node* n = nullptr;
@ -495,7 +495,7 @@ void insertDynamicShapesGuard(
// tensors
// Note: this logic is meant to reflect the invocation of the TE Kernel
// and `runWithAllocatedOutputs` in tensorexpr_fuser.cpp
static Operation StaticRuntimeCopyOuts(const Node* node) {
Operation StaticRuntimeCopyOuts(const Node* node) {
auto num_ten_inputs = node->inputs().size();
return [num_ten_inputs](Stack& stack) {
std::vector<IValue> inputs = pop(stack, num_ten_inputs);
@ -721,7 +721,7 @@ void runTensorExprDynamicGroup(const Code& code, Stack& stack) {
interpreter.run(stack);
}
static Operation createTensorExprDynamicGroup(const Node* node) {
Operation createTensorExprDynamicGroup(const Node* node) {
const auto& graph = node->g(attr::Subgraph);
Code code(graph, "");
// This implementation creates a Code object and InterpreterState on every

View File

@ -43,7 +43,7 @@ namespace jit {
static bool texpr_reductions_enabled = false;
static bool isSupportedForBlock(Node* node) {
bool isSupportedForBlock(Node* node) {
switch (node->kind()) {
case aten::add:
case aten::mul:
@ -187,7 +187,7 @@ bool texprReductionsEnabled() {
return texpr_reductions_enabled;
}
static void removeProfileNodesAndSpecializeTypes(Block* b) {
void removeProfileNodesAndSpecializeTypes(Block* b) {
for (auto it = b->nodes().begin(); it != b->nodes().end(); it++) {
if (it->kind() == prim::profile) {
GRAPH_DEBUG("Removing prim::profile: %", it->output()->debugName());
@ -275,7 +275,7 @@ bool hasTensorTypeSpecialization(Value* v) {
return true;
}
static void removeTensorTypeSpecialization(Value* v) {
void removeTensorTypeSpecialization(Value* v) {
if (hasTensorTypeSpecialization(v)) {
v->setType(TensorType::get());
}
@ -1364,7 +1364,7 @@ void FuseTensorExprs(
GRAPH_DUMP("After TExprFuser: ", graph);
}
static Operation createTensorExprOp(const Node* node) {
Operation createTensorExprOp(const Node* node) {
bool dynamic_shape_fusion_node =
node->hasAttribute(attr::striding_inputs_desc);
if (!dynamic_shape_fusion_node) {

View File

@ -6,7 +6,7 @@
namespace torch {
namespace jit {
static void UpdateDifferentiableGraphRequiresGrad(
void UpdateDifferentiableGraphRequiresGrad(
Block* block,
c10::optional<bool> new_requires_grad) {
for (Node* n : block->nodes()) {

View File

@ -227,7 +227,7 @@ void unmergeSubgraph(Node* subgraphNode) {
subgraphNode->destroy();
}
static void collectNestedUses(
void collectNestedUses(
std::unordered_set<Value*>& closed_over_values,
std::unordered_set<Value*>& new_values,
std::unordered_map<Value*, Value*>& externalValuesMap,
@ -271,7 +271,7 @@ static void collectNestedUses(
}
}
static std::unordered_set<Value*> closedOverValues(
std::unordered_set<Value*> closedOverValues(
Node* toMerge,
std::unordered_map<Value*, Value*>& externalValuesMap) {
std::unordered_set<Value*> closed_over_values;
@ -602,7 +602,7 @@ void unmergeNode(Node* n, Node* subgraphNode) {
n->destroy();
}
static std::string truncateStrWithHash(const std::string& s, size_t maxlen) {
std::string truncateStrWithHash(const std::string& s, size_t maxlen) {
if (s.size() <= maxlen) {
return s;
}

View File

@ -399,12 +399,12 @@ void vulkanFoldPrePackingOps(script::Module& m) {
PrePackingOpsFolder(m, filter_fn, "prepack_folding");
}
static void vulkanRemoveMutation(script::Module& module) {
void vulkanRemoveMutation(script::Module& module) {
auto graph = module.get_method("forward").graph();
RemoveTensorMutation(graph);
}
static void vulkanRunCanonicalOptimizations(script::Module& module) {
void vulkanRunCanonicalOptimizations(script::Module& module) {
auto graph = module.get_method("forward").graph();
for (const auto& method : module.get_methods()) {
auto method_graph = method.graph();

View File

@ -22,13 +22,19 @@ namespace jit {
using value_map = std::unordered_map<Value*, Value*>;
using value_set = std::unordered_set<Value*>;
void wrapDim(int64_t& dim, const std::vector<int64_t>& sizes) {
if (dim < 0) {
dim += sizes.size();
}
}
// need_trim_grad_ops contains functions that return multiple outputs in
// forward, but only the first one requires grad.
// Example:
// kthvalue returns (kthvalue, index of kthvalue), currently autodiff only
// supports at most one output that requires grad. Thus we need to remove
// the grad for index that doesn't require grad.
static bool needTrimGrad(Node* n) {
bool needTrimGrad(Node* n) {
static OperatorSet need_trim_grad_ops = {
"aten::kthvalue(Tensor self, int k, int dim, bool keepdim) -> (Tensor, Tensor)",
"aten::topk(Tensor self, int k, int dim, bool largest, bool sorted) -> (Tensor, Tensor)",
@ -829,7 +835,7 @@ static void lambdaLiftReverse(Gradient& grad_desc, ReverseDetails& rev_info) {
reverse_block->owningNode()->destroy();
}
static void packReturnValuesIntoTuple(const std::shared_ptr<Graph>& graph) {
void packReturnValuesIntoTuple(const std::shared_ptr<Graph>& graph) {
auto returnNode = graph->block()->return_node();
WithInsertPoint wip(returnNode);
auto tuple = graph->insertNode(graph->createTuple(returnNode->inputs()));

View File

@ -70,7 +70,7 @@ void loadDecompositionFunctions() {
} // anonymous namespace
static void DecomposeOp(Node* n) {
void DecomposeOp(Node* n) {
auto schema = n->maybeSchema();
if (!schema) {
return;
@ -89,7 +89,7 @@ static void DecomposeOp(Node* n) {
n->destroy();
}
static void RunDecompositions(Block* block) {
void RunDecompositions(Block* block) {
for (auto it = block->nodes().begin(); it != block->nodes().end();) {
Node* n = *it;
it++; // advance iterator bc the current node may be destroyed

View File

@ -5,7 +5,7 @@
namespace torch {
namespace jit {
static std::ostream& operator<<(std::ostream& out, OpCode op) {
std::ostream& operator<<(std::ostream& out, OpCode op) {
switch (op) {
#define OP_STRING(x, _) \
case x: \
@ -27,7 +27,7 @@ char const* toString(OpCode op) {
return nullptr;
}
static const char* OpInfo(OpCode op) {
const char* OpInfo(OpCode op) {
switch (op) {
#define OP_INFO(x, info) \
case x: \

View File

@ -95,7 +95,6 @@ std::ostream& operator<<(std::ostream& out, Instruction inst);
bool isOpSupportedInMobile(OpCode op);
char const* toString(OpCode op);
OpCode parseOpCode(const char* str);
std::ostream& operator<<(std::ostream& out, Instruction inst);
} // namespace jit

View File

@ -83,7 +83,7 @@ static std::atomic<bool> profiling_mode{true};
static std::mutex fusion_strategy_lock;
static FusionStrategy getInitialStrategy() {
FusionStrategy getInitialStrategy() {
if (FLAGS_torch_jit_always_dynamic) {
return {{FusionBehavior::DYNAMIC, 12}};
}
@ -245,7 +245,7 @@ static C10_UNUSED void setRequiresGradOnDiffGraph(Node* dnode) {
}
}
static bool guardDifferentiableGraph(Node* dnode) {
bool guardDifferentiableGraph(Node* dnode) {
auto gi = dnode->g(attr::Subgraph)->inputs();
bool all_inputs_seen = true;
for (const auto i : c10::irange(gi.size())) {
@ -323,7 +323,7 @@ void runNooptPassPipeline(std::shared_ptr<Graph>& graph) {
"After EliminateDeadCode (end of runNooptPassPipeline)\n", *graph);
}
static void runPreAutodiffPassPipeline(std::shared_ptr<Graph>& graph) {
void runPreAutodiffPassPipeline(std::shared_ptr<Graph>& graph) {
GRAPH_DEBUG(
"Before InsertGuards (beginning of runPreAutodiffPassPipeline)\n",
*graph);
@ -700,7 +700,7 @@ GraphExecutorState ProfilingGraphExecutorImpl::getDebugState() {
return state;
}
static Node* insertFallbackFunctionCall(
Node* insertFallbackFunctionCall(
Graph* graph,
GraphFunction* func,
ArrayRef<Value*> inputs) {
@ -721,7 +721,7 @@ static Node* insertFallbackFunctionCall(
return fun_unpack_tuple;
}
static GraphFunction* createFallbackPathFunction(
GraphFunction* createFallbackPathFunction(
Block* b,
const std::string& function_name) {
auto value_map = [](Value* v) { return v; };

Some files were not shown because too many files have changed in this diff Show More