TH: Clean up dead code (#60655)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/60655

Test Plan: Imported from OSS

Reviewed By: albanD

Differential Revision: D29371717

Pulled By: ngimel

fbshipit-source-id: faa71b1d4a15450c78e12aa917daec853057bce9
This commit is contained in:
Peter Bell 2021-06-24 19:39:36 -07:00 committed by Facebook GitHub Bot
parent 4a7d281119
commit 42c8439b6e
25 changed files with 1 additions and 2266 deletions

View File

@ -332,11 +332,9 @@ filegroup(
filegroup( filegroup(
name = "th_srcs", name = "th_srcs",
srcs = [ srcs = [
"aten/src/TH/THBlas.cpp",
"aten/src/TH/THGeneral.cpp", "aten/src/TH/THGeneral.cpp",
"aten/src/TH/THStorageFunctions.cpp", "aten/src/TH/THStorageFunctions.cpp",
"aten/src/TH/THTensor.cpp", "aten/src/TH/THTensor.cpp",
"aten/src/TH/THTensorMoreMath.cpp",
], ],
) )
@ -546,10 +544,6 @@ header_template_rule(
src = "aten/src/TH/THGeneral.h.in", src = "aten/src/TH/THGeneral.h.in",
out = "aten/src/TH/THGeneral.h", out = "aten/src/TH/THGeneral.h",
substitutions = { substitutions = {
"#cmakedefine USE_BLAS": "#define USE_BLAS",
"#cmakedefine USE_LAPACK": "#define USE_LAPACK",
"#cmakedefine BLAS_F2C": "/* #undef BLAS_F2C */",
"#cmakedefine BLAS_USE_CBLAS_DOT": "#define BLAS_USE_CBLAS_DOT",
}, },
) )

View File

@ -1,15 +1,12 @@
set(Aten_TH_AVX_extra_src) set(Aten_TH_AVX_extra_src)
set(hdr set(hdr
THGeneral.h THHalf.h THStorage.h THStorageFunctions.h THTensor.h THTensorApply.h THBlas.h THGeneral.h THHalf.h THStorage.h THStorageFunctions.h THTensor.h)
THVector.h )
set(ATen_TH_SRCS set(ATen_TH_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/THGeneral.cpp ${CMAKE_CURRENT_SOURCE_DIR}/THGeneral.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THStorageFunctions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/THStorageFunctions.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THTensor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/THTensor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THTensorMoreMath.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THBlas.cpp
) )
# Remember that PARENT_SCOPE variables are not in the current scope # Remember that PARENT_SCOPE variables are not in the current scope
set(ATen_TH_SRCS ${ATen_TH_SRCS} PARENT_SCOPE) set(ATen_TH_SRCS ${ATen_TH_SRCS} PARENT_SCOPE)
@ -36,7 +33,6 @@ configure_file(THGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h")
install(FILES install(FILES
TH.h TH.h
THBlas.h
${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h ${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h
THGenerateAllTypes.h THGenerateAllTypes.h
THGenerateBFloat16Type.h THGenerateBFloat16Type.h
@ -62,17 +58,12 @@ install(FILES
THStorage.h THStorage.h
THStorageFunctions.h THStorageFunctions.h
THTensor.h THTensor.h
THTensorApply.h
THTensorDimApply.h
THVector.h
THHalf.h THHalf.h
THTensor.hpp THTensor.hpp
THStorageFunctions.hpp THStorageFunctions.hpp
DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/TH") DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/TH")
install(FILES install(FILES
generic/THBlas.cpp
generic/THBlas.h
generic/THStorage.cpp generic/THStorage.cpp
generic/THStorage.h generic/THStorage.h
generic/THStorageCopy.cpp generic/THStorageCopy.cpp
@ -80,8 +71,5 @@ install(FILES
generic/THTensor.cpp generic/THTensor.cpp
generic/THTensor.h generic/THTensor.h
generic/THTensor.hpp generic/THTensor.hpp
generic/THTensorMath.h
generic/THVector.h
# See Note [TH abstraction violation] # See Note [TH abstraction violation]
generic/THTensorFastGetSet.hpp
DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/TH/generic") DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/TH/generic")

View File

@ -3,11 +3,7 @@
#include <TH/THGeneral.h> #include <TH/THGeneral.h>
#include <TH/THBlas.h>
#include <TH/THVector.h>
#include <TH/THStorageFunctions.h> #include <TH/THStorageFunctions.h>
#include <TH/THTensor.h> #include <TH/THTensor.h>
#include <TH/THTensorApply.h>
#include <TH/THTensorDimApply.h>
#endif #endif

View File

@ -1,13 +0,0 @@
#include <TH/THBlas.h>
// NOLINTNEXTLINE(bugprone-suspicious-include)
#include <TH/generic/THBlas.cpp>
#include <TH/THGenerateAllTypes.h>
// NOLINTNEXTLINE(bugprone-suspicious-include)
#include <TH/generic/THBlas.cpp>
#include <TH/THGenerateBFloat16Type.h>
// NOLINTNEXTLINE(bugprone-suspicious-include)
#include <TH/generic/THBlas.cpp>
#include <TH/THGenerateHalfType.h>

View File

@ -1,17 +0,0 @@
#ifndef TH_BLAS_INC
#define TH_BLAS_INC
#include <TH/THGeneral.h>
#define THBlas_(NAME) TH_CONCAT_4(TH,Real,Blas_,NAME)
#include <TH/generic/THBlas.h>
#include <TH/THGenerateAllTypes.h>
#include <TH/generic/THBlas.h>
#include <TH/THGenerateBFloat16Type.h>
#include <TH/generic/THBlas.h>
#include <TH/THGenerateHalfType.h>
#endif

View File

@ -68,21 +68,6 @@ void _THAssertionFailed(const char *file, const int line, const char *exp, const
_THError(file, line, "Assertion `%s' failed. %s", exp, msg); _THError(file, line, "Assertion `%s' failed. %s", exp, msg);
} }
void THSetErrorHandler(THErrorHandlerFunction new_handler, void *data)
{
threadErrorHandler = new_handler;
threadErrorHandlerData = data;
}
void THSetDefaultErrorHandler(THErrorHandlerFunction new_handler, void *data)
{
if (new_handler)
defaultErrorHandler = new_handler;
else
defaultErrorHandler = defaultErrorHandlerFunction;
defaultErrorHandlerData = data;
}
/* Torch Arg Checking Handling */ /* Torch Arg Checking Handling */
static void defaultArgErrorHandlerFunction(int argNumber, const char *msg, void *data) static void defaultArgErrorHandlerFunction(int argNumber, const char *msg, void *data)
{ {
@ -125,42 +110,6 @@ void _THArgCheck(const char *file, int line, int condition, int argNumber, const
} }
} }
void THSetArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data)
{
threadArgErrorHandler = new_handler;
threadArgErrorHandlerData = data;
}
void THSetDefaultArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data)
{
if (new_handler)
defaultArgErrorHandler = new_handler;
else
defaultArgErrorHandler = defaultArgErrorHandlerFunction;
defaultArgErrorHandlerData = data;
}
// NOLINTNEXTLINE(modernize-use-nullptr,cppcoreguidelines-avoid-non-const-global-variables)
static __thread void (*torchGCFunction)(void *data) = NULL;
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
static __thread void *torchGCData;
/* Optional hook for integrating with a garbage-collected frontend.
*
* If torch is running with a garbage-collected frontend (e.g. Lua),
* the GC isn't aware of TH-allocated memory so may not know when it
* needs to run. These hooks trigger the GC to run in two cases:
*
* (1) When a memory allocation (malloc, realloc, ...) fails
* (2) When the total TH-allocated memory hits a dynamically-adjusted
* soft maximum.
*/
void THSetGCHandler( void (*torchGCFunction_)(void *data), void *data )
{
torchGCFunction = torchGCFunction_;
torchGCData = data;
}
void* THAlloc(ptrdiff_t size) void* THAlloc(ptrdiff_t size)
{ {
if(size < 0) if(size < 0)
@ -169,63 +118,7 @@ void* THAlloc(ptrdiff_t size)
return c10::alloc_cpu(size); return c10::alloc_cpu(size);
} }
void* THRealloc(void *ptr, ptrdiff_t size)
{
if(!ptr)
return(THAlloc(size));
if(size == 0)
{
THFree(ptr);
// NOLINTNEXTLINE(modernize-use-nullptr)
return NULL;
}
if(size < 0)
THError("$ Torch: invalid memory size -- maybe an overflow?");
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
void *newptr = realloc(ptr, size);
if(!newptr && torchGCFunction) {
torchGCFunction(torchGCData);
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
newptr = realloc(ptr, size);
}
if(!newptr)
THError("$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824);
return newptr;
}
void THFree(void *ptr) void THFree(void *ptr)
{ {
c10::free_cpu(ptr); c10::free_cpu(ptr);
} }
THDescBuff _THSizeDesc(const int64_t *size, const int64_t ndim) {
const int L = TH_DESC_BUFF_LEN;
THDescBuff buf;
char *str = buf.str;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t i;
int64_t n = 0;
n += snprintf(str, L-n, "[");
for (i = 0; i < ndim; i++) {
if (n >= L) break;
n += snprintf(str+n, L-n, "%" PRId64, size[i]);
if (i < ndim-1) {
n += snprintf(str+n, L-n, " x ");
}
}
if (n < L - 2) {
snprintf(str+n, L-n, "]");
} else {
snprintf(str+L-5, 5, "...]");
}
return buf;
}

View File

@ -21,11 +21,6 @@
#include <mkl_vsl.h> #include <mkl_vsl.h>
#endif #endif
#cmakedefine USE_BLAS
#cmakedefine USE_LAPACK
#cmakedefine BLAS_F2C
#cmakedefine BLAS_USE_CBLAS_DOT
# define TH_EXTERNC extern "C" # define TH_EXTERNC extern "C"
// Note(jiayq): copied from ATen/core/Macros.h. Because internal build of TH // Note(jiayq): copied from ATen/core/Macros.h. Because internal build of TH
@ -72,26 +67,12 @@
typedef void (*THErrorHandlerFunction)(const char *msg, void *data); typedef void (*THErrorHandlerFunction)(const char *msg, void *data);
typedef void (*THArgErrorHandlerFunction)(int argNumber, const char *msg, void *data); typedef void (*THArgErrorHandlerFunction)(int argNumber, const char *msg, void *data);
#define TH_DESC_BUFF_LEN 64
typedef struct {
char str[TH_DESC_BUFF_LEN];
} THDescBuff;
TH_API THDescBuff _THSizeDesc(const int64_t *size, const int64_t ndim);
TH_API TH_NO_RETURN void _THError(const char *file, const int line, const char *fmt, ...); TH_API TH_NO_RETURN void _THError(const char *file, const int line, const char *fmt, ...);
TH_API void _THAssertionFailed(const char *file, const int line, const char *exp, const char *fmt, ...); TH_API void _THAssertionFailed(const char *file, const int line, const char *exp, const char *fmt, ...);
TH_API void THSetErrorHandler(THErrorHandlerFunction new_handler, void *data);
TH_API void THSetDefaultErrorHandler(THErrorHandlerFunction new_handler, void *data);
TH_API void _THArgCheck(const char *file, int line, int condition, int argNumber, const char *fmt, ...); TH_API void _THArgCheck(const char *file, int line, int condition, int argNumber, const char *fmt, ...);
TH_API void THSetArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data);
TH_API void THSetDefaultArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data);
TH_API void* THAlloc(ptrdiff_t size); TH_API void* THAlloc(ptrdiff_t size);
TH_API void* THRealloc(void *ptr, ptrdiff_t size);
TH_API void THFree(void *ptr); TH_API void THFree(void *ptr);
TH_API void THSetGCHandler( void (*torchGCHandlerFunction)(void *data), void *data );
// this hook should only be called by custom allocator functions
TH_API void THHeapUpdate(ptrdiff_t size);
#define THError(...) _THError(__FILE__, __LINE__, __VA_ARGS__) #define THError(...) _THError(__FILE__, __LINE__, __VA_ARGS__)

View File

@ -36,42 +36,3 @@ void THTensor_setStorage(THTensor *self, THStorage *storage_, ptrdiff_t storageO
c10::raw::intrusive_ptr::incref(storage_); c10::raw::intrusive_ptr::incref(storage_);
THTensor_wrap(self).set_(at::Storage(c10::intrusive_ptr<at::StorageImpl>::reclaim(storage_)), storageOffset_, size_, stride_); THTensor_wrap(self).set_(at::Storage(c10::intrusive_ptr<at::StorageImpl>::reclaim(storage_)), storageOffset_, size_, stride_);
} }
void THTensor_resize(THTensor *self, at::IntArrayRef size, at::IntArrayRef stride)
{
if (stride.data()) {
THArgCheck(stride.size() == size.size(), 3, "invalid stride");
}
#ifdef DEBUG
THAssert(size.size() <= INT_MAX);
#endif
THTensor_resizeNd(self, size.size(), size.data(), stride.data());
}
void THTensor_resizeNd(THTensor *self, int nDimension, const int64_t *size, const int64_t *stride)
{
TORCH_CHECK(nDimension >= 0, "resizeNd nDimension must be non-negative");
at::IntArrayRef sizes(size, nDimension);
at::optional<at::IntArrayRef> strides;
if (stride) {
strides = at::IntArrayRef(stride, nDimension);
}
at::native::resize_impl_cpu_(self, sizes, strides);
}
// NB: Steals ownership of storage
void THTensor_stealAndSetStoragePtr(THTensor* tensor, THStorage* storage) {
// Caffe2 might have tensors whose storages are null, but we
// don't allow it in PyTorch.
AT_ASSERT(storage);
// We used to allow this, but this breaks device caching.
// Let's put an actual error message for this one.
TORCH_CHECK(tensor->storage().device() == storage->device(),
"Attempted to set the storage of a tensor on device \"", tensor->storage().device(),
"\" to a storage on different device \"", storage->device(),
"\". This is no longer allowed; the devices must match.");
tensor->set_storage_keep_dtype(
at::Storage(c10::intrusive_ptr<THStorage>::reclaim(storage)));
}

View File

@ -2,7 +2,6 @@
#define TH_TENSOR_INC #define TH_TENSOR_INC
#include <TH/THStorageFunctions.h> #include <TH/THStorageFunctions.h>
#include <TH/THTensorApply.h>
#define THTensor_(NAME) TH_CONCAT_4(TH,Real,Tensor_,NAME) #define THTensor_(NAME) TH_CONCAT_4(TH,Real,Tensor_,NAME)
@ -21,20 +20,4 @@
#include <TH/generic/THTensor.h> #include <TH/generic/THTensor.h>
#include <TH/THGenerateBFloat16Type.h> #include <TH/THGenerateBFloat16Type.h>
/* maths */
#include <TH/generic/THTensorMath.h>
#include <TH/THGenerateAllTypes.h>
#include <TH/generic/THTensorMath.h>
#include <TH/THGenerateBoolType.h>
#include <TH/generic/THTensorMath.h>
#include <TH/THGenerateHalfType.h>
#include <TH/generic/THTensorMath.h>
#include <TH/THGenerateBFloat16Type.h>
#include <TH/generic/THTensorMath.h>
#include <TH/THGenerateComplexTypes.h>
#endif #endif

View File

@ -82,14 +82,6 @@ inline int64_t THTensor_sizeLegacyNoScalars(const THTensor *self, int dim)
return self->dim() == 0 ? 1 : self->size(dim); return self->dim() == 0 ? 1 : self->size(dim);
} }
#include <TH/generic/THTensorFastGetSet.hpp>
#include <TH/THGenerateAllTypes.h>
#include <TH/generic/THTensorFastGetSet.hpp>
#include <TH/THGenerateComplexTypes.h>
#include <TH/generic/THTensorFastGetSet.hpp>
#include <TH/THGenerateBFloat16Type.h>
inline std::vector<int64_t> THTensor_sizesLegacyNoScalars(const THTensor *self) { inline std::vector<int64_t> THTensor_sizesLegacyNoScalars(const THTensor *self) {
if (self->dim() == 0) { if (self->dim() == 0) {
@ -98,20 +90,7 @@ inline std::vector<int64_t> THTensor_sizesLegacyNoScalars(const THTensor *self)
return self->sizes().vec(); return self->sizes().vec();
} }
} }
inline std::vector<int64_t> THTensor_stridesLegacyNoScalars(const THTensor *self) {
if (self->dim() == 0) {
return {1};
} else {
return self->strides().vec();
}
}
// NB: Steals ownership of storage
TH_API void THTensor_stealAndSetStoragePtr(THTensor* tensor, THStorage* storage);
TH_API void THTensor_free(THTensor *self); TH_API void THTensor_free(THTensor *self);
TH_API void THTensor_resizeNd(THTensor *self, int nDimension, const int64_t *size, const int64_t *stride);
TH_CPP_API void THTensor_resize(THTensor *self, at::IntArrayRef size, at::IntArrayRef stride); TH_CPP_API void THTensor_resize(THTensor *self, at::IntArrayRef size, at::IntArrayRef stride);
TH_CPP_API void THTensor_setStorage(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_); TH_CPP_API void THTensor_setStorage(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_);

View File

@ -1,309 +0,0 @@
#ifndef TH_TENSOR_APPLY_INC
#define TH_TENSOR_APPLY_INC
#include <ATen/Parallel.h>
/*
* The basic strategy for apply is as follows:
*
* 1. Starting with the outermost index, loop until we reach a dimension where the
* data is no longer contiguous, i.e. the stride at that dimension is not equal to
* the size of the tensor defined by the outer dimensions. Let's call this outer
* (contiguous) tensor A. Note that if the Tensor is contiguous, then A is equal
* to the entire Tensor. Let's call the inner tensor B.
*
* 2. We loop through the indices in B, starting at its outermost dimension. For
* example, if B is a 2x2 matrix, then we do:
*
* B[0][0]
* B[0][1]
* B[1][0]
* B[1][1]
*
* We set the offset into the underlying storage as (storageOffset + stride_B * index_B),
* i.e. basically we compute the offset into the storage as we would normally for a
* Tensor. But because we are guaranteed the subsequent data is contiguous in memory, we
* can simply loop for sizeof(A) iterations and perform the operation, without having to
* follow the order described by the strides of A.
*
* 3. As an optimization, we merge dimensions of A that are contiguous in memory. For
* example, if A is a 3x3x3x3 tensor narrowed from a 3x3x4x3 tensor, then the first two
* dimensions can be merged for the purposes of APPLY, reducing the number of nested
* loops.
*/
#define __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, ALLOW_CONTIGUOUS) \
TYPE *TENSOR##_data = NULL; \
int64_t *TENSOR##_counter = NULL, *TENSOR##_sizes = NULL, *TENSOR##_strides = NULL, *TENSOR##_dimOffset = NULL; \
int64_t TENSOR##_stride = 0, TENSOR##_size = 0, TENSOR##_dim = 0, TENSOR##_i, TENSOR##_n; \
int TENSOR##_contiguous = ALLOW_CONTIGUOUS && DIM < 0; \
TENSOR##_n = 1; \
for(TENSOR##_i = 0; TENSOR##_i < TENSOR->dim(); TENSOR##_i++) \
TENSOR##_n *= TENSOR->size(TENSOR##_i); \
\
if(TENSOR->is_empty()) \
TH_TENSOR_APPLY_hasFinished = 1; \
else \
{ \
TENSOR##_data = THTensor_getStoragePtr(TENSOR)->data<TYPE>()+TENSOR->storage_offset(); \
TENSOR##_size = 1; \
TENSOR##_stride = 1; \
for(TENSOR##_i = THTensor_nDimensionLegacyAll(TENSOR)-1; TENSOR##_i >= 0; TENSOR##_i--) { \
if(THTensor_sizeLegacyNoScalars(TENSOR, TENSOR##_i) != 1) { \
if(THTensor_strideLegacyNoScalars(TENSOR, TENSOR##_i) == TENSOR##_size && TENSOR##_i != DIM) \
TENSOR##_size *= THTensor_sizeLegacyNoScalars(TENSOR, TENSOR##_i); \
else{ \
TENSOR##_contiguous = 0; \
break; \
} \
} \
} \
if (!TENSOR##_contiguous) { \
/* Find the dimension of contiguous sections */ \
TENSOR##_dim = 1; \
for(TENSOR##_i = THTensor_nDimensionLegacyAll(TENSOR)-2; TENSOR##_i >= 0; TENSOR##_i--) \
{ \
if(TENSOR->stride(TENSOR##_i) != TENSOR->stride(TENSOR##_i+1) * TENSOR->size(TENSOR##_i+1) || TENSOR##_i == DIM || TENSOR##_i+1 == DIM) \
TENSOR##_dim++; \
} \
/* Allocate an array of 3*dim elements, where dim is the number of contiguous sections */ \
TENSOR##_counter = (int64_t*)THAlloc(sizeof(int64_t)*(3*TENSOR##_dim)); \
TENSOR##_sizes = TENSOR##_counter + TENSOR##_dim; \
TENSOR##_strides = TENSOR##_counter + 2*TENSOR##_dim; \
TH_TENSOR_dim_index = TENSOR##_dim-1; \
TENSOR##_dimOffset = (DIM == THTensor_nDimensionLegacyAll(TENSOR)-1) ? &TENSOR##_i : &TENSOR##_counter[DIM]; \
TENSOR##_sizes[TH_TENSOR_dim_index] = THTensor_sizeLegacyNoScalars(TENSOR, THTensor_nDimensionLegacyAll(TENSOR)-1); \
TENSOR##_strides[TH_TENSOR_dim_index] = THTensor_strideLegacyNoScalars(TENSOR, THTensor_nDimensionLegacyAll(TENSOR)-1); \
/* TENSOR##_counter tracks where we are in the storage. The offset into the */ \
/* storage is given by storage_offset + (i * j), where i is the stride */ \
/* vector and j is tensor_counter vector. This sets the starting position for the loop. */ \
for(TENSOR##_i = TENSOR##_dim-1; TENSOR##_i >= 0; --TENSOR##_i) { \
TENSOR##_counter[TENSOR##_i] = 0; \
} \
for(TENSOR##_i = THTensor_nDimensionLegacyAll(TENSOR)-2; TENSOR##_i >= 0; --TENSOR##_i) { \
if (TENSOR->stride(TENSOR##_i) == TENSOR->stride(TENSOR##_i+1) * TENSOR->size(TENSOR##_i+1) && TENSOR##_i != DIM && TENSOR##_i+1 != DIM) { \
TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size(TENSOR##_i) * TENSOR##_sizes[TH_TENSOR_dim_index]; \
if (DIM != THTensor_nDimensionLegacyAll(TENSOR)-1 && TENSOR##_i < DIM) \
TENSOR##_dimOffset--; \
} else { \
--TH_TENSOR_dim_index; \
TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size(TENSOR##_i); \
TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride(TENSOR##_i); \
} \
} \
/* Size of the inner most section */ \
TENSOR##_size = TENSOR##_sizes[TENSOR##_dim-1]; \
/* Stride of the inner most section */ \
TENSOR##_stride = TENSOR##_strides[TENSOR##_dim-1]; \
} \
else{\
TENSOR##_dim = 1;\
TENSOR##_counter = (int64_t*)THAlloc(sizeof(int64_t)*3);\
TENSOR##_sizes = TENSOR##_counter + 1;\
TENSOR##_strides = TENSOR##_counter + 2;\
TENSOR##_sizes[0] = TENSOR##_n;\
TENSOR##_strides[0] = 1;\
TENSOR##_size = TENSOR##_sizes[0];\
TENSOR##_stride = TENSOR##_strides[0];\
}\
} \
TENSOR##_i = 0;
#define __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, ALWAYS_UPDATE) \
if(TENSOR##_i == TENSOR##_size || ALWAYS_UPDATE) \
{ \
if(TENSOR##_contiguous) \
break; \
\
if(TENSOR##_dim == 1) \
break; \
\
/* Reset pointer to beginning of loop */ \
TENSOR##_data -= TENSOR##_size*TENSOR##_stride; \
for(TENSOR##_i = TENSOR##_dim-2; TENSOR##_i >= 0; TENSOR##_i--) \
{ \
TENSOR##_counter[TENSOR##_i]++; \
/* Jump ahread by the stride of this dimension */ \
TENSOR##_data += TENSOR##_strides[TENSOR##_i]; \
\
if(TENSOR##_counter[TENSOR##_i] == TENSOR##_sizes[TENSOR##_i]) \
{ \
if(TENSOR##_i == 0) \
{ \
TH_TENSOR_APPLY_hasFinished = 1; \
break; \
} \
else \
{ \
/* Reset the pointer to the beginning of the chunk defined by this dimension */ \
TENSOR##_data -= TENSOR##_counter[TENSOR##_i]*TENSOR##_strides[TENSOR##_i]; \
TENSOR##_counter[TENSOR##_i] = 0; \
} \
} \
else \
break; \
} \
TENSOR##_i = 0; \
} \
#define TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIM, CODE) \
{ \
int TH_TENSOR_APPLY_hasFinished = 0; \
int64_t TH_TENSOR_dim_index = 0; \
__TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \
__TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \
__TH_TENSOR_APPLYX_PREAMBLE(TYPE3, TENSOR3, DIM, 1) \
\
int elements_equal = 1; \
if(TENSOR1##_n != TENSOR2##_n) { \
elements_equal = 0; \
} \
else if(TENSOR1##_n != TENSOR3##_n) { \
elements_equal = 0; \
} \
if (elements_equal == 0) { \
AT_ERROR("inconsistent tensor size, expected ", \
#TENSOR1, " ", TENSOR1->sizes(), ", ", \
#TENSOR2, " ", TENSOR2->sizes(), " and ", \
#TENSOR3, " ", TENSOR3->sizes(), " to have the same " \
"number of elements, but got ", TENSOR1##_n, ", ", \
TENSOR2##_n, " and ", TENSOR3##_n, " elements respectively"); \
} \
\
while(!TH_TENSOR_APPLY_hasFinished) \
{ \
/* Loop through the inner most region of the Tensor */ \
for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size && TENSOR3##_i < TENSOR3##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR3##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride, TENSOR3##_data += TENSOR3##_stride) /* 0 et pas TENSOR##_dim! */ \
{ \
CODE \
} \
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR3, 0) \
} \
if(TENSOR1##_counter != NULL) \
THFree(TENSOR1##_counter); \
if(TENSOR2##_counter != NULL) \
THFree(TENSOR2##_counter); \
if(TENSOR3##_counter != NULL) \
THFree(TENSOR3##_counter); \
}
#define TH_TENSOR_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \
TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, -1, CODE)
#define TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, DIM, CODE) \
{ \
int TH_TENSOR_APPLY_hasFinished = 0; \
int64_t TH_TENSOR_dim_index = 0; \
__TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \
__TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \
\
if(TENSOR1##_n != TENSOR2##_n) { \
AT_ERROR("inconsistent tensor size, expected ", \
#TENSOR1, " ", TENSOR1->sizes(), " and ", \
#TENSOR2, " ", TENSOR2->sizes(), \
" to have the same number of elements, but got ", \
TENSOR1##_n, " and ", TENSOR2##_n, " elements respectively"); \
} \
while(!TH_TENSOR_APPLY_hasFinished) \
{ \
/* Loop through the inner most region of the Tensor */ \
for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride) /* 0 et pas TENSOR##_dim! */ \
{ \
CODE \
} \
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \
} \
if(TENSOR1##_counter != NULL) \
THFree(TENSOR1##_counter); \
if(TENSOR2##_counter != NULL) \
THFree(TENSOR2##_counter); \
}
#define TH_TENSOR_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, -1, CODE)
#define TH_TENSOR_APPLY_D(TYPE, TENSOR, DIM, CODE) \
{ \
int TH_TENSOR_APPLY_hasFinished = 0; \
int64_t TH_TENSOR_dim_index = 0; \
__TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, 0) \
\
while(!TH_TENSOR_APPLY_hasFinished) \
{ \
/* Loop through the inner most region of the Tensor */ \
for(; TENSOR##_i < TENSOR##_size; TENSOR##_i++, TENSOR##_data += TENSOR##_stride) /* 0 et pas TENSOR##_dim! */ \
{ \
CODE \
} \
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, 1) \
} \
THFree(TENSOR##_counter); \
}
#define TH_TENSOR_APPLY(TYPE, TENSOR, CODE) \
TH_TENSOR_APPLY_D(TYPE, TENSOR, -1, CODE)
/*
* Calcuate the memory offset of an element in a tensor. The strategy is below:
*
* 1. convert the line index(the index of the element) to the indexs(coordinates) in the tensor.
* It can hinted by a classical problem: Getting each individual digit from a whole integer(Decimal base).
* A N-digit decimal base number could be view as a N-dimension tensor and the sizes of the tensor are 10.
* So the value the whole integer is the line index. And the digits could be viewed as the indexes in
* different dimensions.
*
* 2. convert the indexs(coordinates) in the tensor to the memory offset.
*
* You can get the detailes in the for-statement iterations.
*
* The macro is only used in the first element in each thread. For the rest, the memory offset could update
* according to info of the tensor in order to get better performance. So we should also record the each
* indexs in coresponding dimension of first element.
* The recorded info is stored in the TENSOR##_counter_tmp.
*
*/
#define __TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR) \
int64_t *TENSOR##_counter_tmp = (int64_t*)THAlloc(sizeof(int64_t) * TENSOR##_dim); \
ptrdiff_t TENSOR##_memory_offset = 0; \
ptrdiff_t TENSOR##_quot = line_index_start; \
for (TENSOR##_i = TENSOR##_dim-1; TENSOR##_i>=0; --TENSOR##_i) { \
TENSOR##_counter_tmp[TENSOR##_i] = TENSOR##_quot%TENSOR##_sizes[TENSOR##_i]; \
TENSOR##_quot /= TENSOR##_sizes[TENSOR##_i]; \
TENSOR##_memory_offset += TENSOR##_counter_tmp[TENSOR##_i] * TENSOR##_strides[TENSOR##_i]; \
}
/*
* The macro update the indexes in each dimension of the elements except for the first one allocated in
* each thread.
* For a tensor, if the index of some dimension reaches the size of the corresponding dimension. It will carry and clear.
* If the index of next high dimension does do, the index of next high dimension should carry and clear, too.
*
* The momery offset calculatation is a little confusing. If current index carries, the current index is set to 0. So
* the offset should decrease by size*stride of the last dimension. Then the index next high dimension increases by 1. So
* the offset should increase by stride of next high dimension.
*/
#define __TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR) \
if(TENSOR##_i == TENSOR##_size && TENSOR##_dim > 1){ /*reaches the edge*/ \
int TENSOR##_carry_coord = 1; /*set carry flag to true*/ \
TENSOR##_start = 0; /*the current index be cleared to 0*/\
TENSOR##_data -= TENSOR##_size * TENSOR##_stride; /*the momery offset reset to the first one in current dimension */\
for(TENSOR##_i = TENSOR##_dim - 2; (TENSOR##_i >= 0) && (TENSOR##_carry_coord); TENSOR##_i--){ \
TENSOR##_counter_tmp[TENSOR##_i]++; /*the index of next high dimension update*/ \
TENSOR##_data += TENSOR##_strides[TENSOR##_i]; /*memory offset increase by stride of next high dimension*/\
if(TENSOR##_counter_tmp[TENSOR##_i] == TENSOR##_sizes[TENSOR##_i]){ /*The next high dimension also carry, continue
to clear and carry*/ \
TENSOR##_data -= TENSOR##_sizes[TENSOR##_i] * TENSOR##_strides[TENSOR##_i]; \
TENSOR##_counter_tmp[TENSOR##_i] = 0; \
} else { \
TENSOR##_carry_coord = 0; \
} \
} \
} else { \
TENSOR##_start = TENSOR##_i; \
}
#endif

View File

@ -1,329 +0,0 @@
#ifndef TH_TENSOR_DIM_APPLY_INC
#define TH_TENSOR_DIM_APPLY_INC
// This is an example of SIZE_CHECK argument passable to TH_TENSOR_DIM_APPLY3.
// The TENSOR1, TENSOR2, TENSOR3, DIMENSION will be expanded the same way as
// TH_TENSOR_DIM_APPLY3.
// Specifically, this check ensures that TENSOR1, TENSOR2, TENSOR3 have same
// size except for DIMENSION.
#define TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM(TENSOR1, TENSOR2, TENSOR3, DIMENSION) \
{ \
int shape_check_flag = 0; \
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyNoScalars(TENSOR1); TH_TENSOR_DIM_APPLY_i++) \
{ \
if (TH_TENSOR_DIM_APPLY_i == DIMENSION) \
continue; \
if (TENSOR1->size(TH_TENSOR_DIM_APPLY_i) != TENSOR2->size(TH_TENSOR_DIM_APPLY_i)) { \
shape_check_flag = 1; \
break; \
} \
if(TENSOR1->size(TH_TENSOR_DIM_APPLY_i) != TENSOR3->size(TH_TENSOR_DIM_APPLY_i)) { \
shape_check_flag = 1; \
break; \
} \
} \
if (shape_check_flag == 1) { \
AT_ERROR("Expected ", #TENSOR1, " ", TENSOR1->sizes(), ", ", #TENSOR2, " ", TENSOR2->sizes(), " and ", #TENSOR3, " ", TENSOR3->sizes(), " to have the same size apart from dimension ", DIMENSION); \
} \
}
#define TH_TENSOR_DIM_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIMENSION, SIZE_CHECK, CODE) \
{ \
TYPE1 *TENSOR1##_data = NULL; \
TH_UNUSED int64_t TENSOR1##_stride = 0, TENSOR1##_size = 0; \
TYPE2 *TENSOR2##_data = NULL; \
TH_UNUSED int64_t TENSOR2##_stride = 0, TENSOR2##_size = 0; \
TYPE3 *TENSOR3##_data = NULL; \
TH_UNUSED int64_t TENSOR3##_stride = 0, TENSOR3##_size = 0; \
int64_t *TH_TENSOR_DIM_APPLY_counter = NULL; \
int TH_TENSOR_DIM_APPLY_hasFinished = THTensor_(numel)(TENSOR1) == 0; \
int TH_TENSOR_DIM_APPLY_i; \
\
if( (DIMENSION < 0) || (DIMENSION >= THTensor_nDimensionLegacyNoScalars(TENSOR1)) ) \
THError("invalid dimension %d (expected to be 0 <= dim < %d)", DIMENSION, THTensor_nDimensionLegacyNoScalars(TENSOR1)); \
int same_dims = 1; \
if( THTensor_nDimensionLegacyNoScalars(TENSOR1) != THTensor_nDimensionLegacyNoScalars(TENSOR2) ) { \
same_dims = 0; \
} \
if( THTensor_nDimensionLegacyNoScalars(TENSOR1) != THTensor_nDimensionLegacyNoScalars(TENSOR3) ) { \
same_dims = 0; \
} \
if (same_dims == 0) { \
AT_ERROR("inconsistent tensor size, expected ", #TENSOR1, " ", TENSOR1->sizes(), ", ", #TENSOR2, " ", TENSOR2->sizes(), " and ", #TENSOR3, " ",TENSOR3->sizes() , " to have the same number of dimensions"); \
} \
SIZE_CHECK(TENSOR1, TENSOR2, TENSOR3, DIMENSION) \
\
if (TH_TENSOR_DIM_APPLY_hasFinished) { \
return; \
} \
TH_TENSOR_DIM_APPLY_counter = (int64_t*)THAlloc(sizeof(int64_t)*(THTensor_nDimensionLegacyNoScalars(TENSOR1))); \
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyNoScalars(TENSOR1); TH_TENSOR_DIM_APPLY_i++) \
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
\
TENSOR1##_data = THTensor_getStoragePtr(TENSOR1)->data<TYPE1>()+(TENSOR1)->storage_offset(); \
TENSOR1##_stride = THTensor_strideLegacyNoScalars((TENSOR1), DIMENSION); \
TENSOR1##_size = THTensor_sizeLegacyNoScalars((TENSOR1), DIMENSION); \
\
TENSOR2##_data = THTensor_getStoragePtr(TENSOR2)->data<TYPE2>()+(TENSOR2)->storage_offset(); \
TENSOR2##_stride = THTensor_strideLegacyNoScalars((TENSOR2), DIMENSION); \
TENSOR2##_size = THTensor_sizeLegacyNoScalars((TENSOR2), DIMENSION); \
\
TENSOR3##_data = THTensor_getStoragePtr(TENSOR3)->data<TYPE3>()+(TENSOR3)->storage_offset(); \
TENSOR3##_stride = THTensor_strideLegacyNoScalars((TENSOR3), DIMENSION); \
TENSOR3##_size = THTensor_sizeLegacyNoScalars((TENSOR3), DIMENSION); \
\
while(!TH_TENSOR_DIM_APPLY_hasFinished) \
{ \
CODE \
\
if(THTensor_nDimensionLegacyNoScalars(TENSOR1) == 1) \
break; \
\
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyNoScalars(TENSOR1); TH_TENSOR_DIM_APPLY_i++) \
{ \
if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
{ \
if(TH_TENSOR_DIM_APPLY_i == THTensor_nDimensionLegacyNoScalars(TENSOR1)-1) \
{ \
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
break; \
} \
continue; \
} \
\
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \
TENSOR1##_data += THTensor_strideLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i); \
TENSOR2##_data += THTensor_strideLegacyNoScalars(TENSOR2, TH_TENSOR_DIM_APPLY_i); \
TENSOR3##_data += THTensor_strideLegacyNoScalars(TENSOR3, TH_TENSOR_DIM_APPLY_i); \
\
if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == THTensor_sizeLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i)) \
{ \
if(TH_TENSOR_DIM_APPLY_i == THTensor_nDimensionLegacyNoScalars(TENSOR1)-1) \
{ \
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
break; \
} \
else \
{ \
TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*THTensor_strideLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i); \
TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*THTensor_strideLegacyNoScalars(TENSOR2, TH_TENSOR_DIM_APPLY_i); \
TENSOR3##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*THTensor_strideLegacyNoScalars(TENSOR3, TH_TENSOR_DIM_APPLY_i); \
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
} \
} \
else \
break; \
} \
} \
THFree(TH_TENSOR_DIM_APPLY_counter); \
}
/**
* Similar to DIM_APPLY(...) but we maintain two sets of pointers: one for the first tensor
* and one for the second. The two tensors must have the same shape, other than at the
* specified DIMENSION. This function makes it easy to store the output from reducing the
* TENSOR at index. For example, in the sum example described below, we could instead do:
*
* int64_t i = 0;
* TYPE1 sum;
*
* for (i = 0; i < TENSOR1##_size; ++i) {
* sum += TENSOR1##_data[i * TENSOR1##_stride]
* }
* *TENSOR2##_data = (TYPE2) sum;
*
* In particular, we guarantee that the offset into TENSOR2 will be what you would get if
* you applied all of the index values used to generate the offset into TENSOR1.
*/
#define TH_TENSOR_DIM_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, DIMENSION, CODE) \
{ \
TYPE1 *TENSOR1##_data = NULL; \
TH_UNUSED int64_t TENSOR1##_stride = 0, TENSOR1##_size = 0; \
TYPE2 *TENSOR2##_data = NULL; \
TH_UNUSED int64_t TENSOR2##_stride = 0, TENSOR2##_size = 0; \
int64_t *TH_TENSOR_DIM_APPLY_counter = NULL; \
int TH_TENSOR_DIM_APPLY_hasFinished = THTensor_(numel)(TENSOR1) == 0; \
int TH_TENSOR_DIM_APPLY_i; \
\
if( (DIMENSION < 0) || (DIMENSION >= THTensor_nDimensionLegacyNoScalars(TENSOR1)) ) \
THError("invalid dimension %d (expected to be 0 <= dim < %d)", DIMENSION, THTensor_nDimensionLegacyAll(TENSOR1)); \
if( THTensor_nDimensionLegacyNoScalars(TENSOR1) != THTensor_nDimensionLegacyNoScalars(TENSOR2)) { \
AT_ERROR("inconsistent tensor size, expected ", #TENSOR1, " ", TENSOR1->sizes(), " and ", #TENSOR2, " ", TENSOR2->sizes(), " to have the same number of dimensions"); \
} \
TH_UNUSED int shape_check_flag = 0; \
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyNoScalars(TENSOR1); TH_TENSOR_DIM_APPLY_i++) \
{ \
if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
continue; \
if(THTensor_sizeLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i) != THTensor_sizeLegacyNoScalars(TENSOR2, TH_TENSOR_DIM_APPLY_i)) { \
AT_ERROR("Expected ", #TENSOR1, " ", TENSOR1->sizes(), " and ", #TENSOR2, " ", TENSOR2->sizes(), " to have the same size in dimension ", DIMENSION); \
} \
} \
\
if (TH_TENSOR_DIM_APPLY_hasFinished) { \
return; \
} \
TH_TENSOR_DIM_APPLY_counter = (int64_t*)THAlloc(sizeof(int64_t)*(THTensor_nDimensionLegacyNoScalars(TENSOR1))); \
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyNoScalars(TENSOR1); TH_TENSOR_DIM_APPLY_i++) \
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
\
TENSOR1##_data = THTensor_getStoragePtr(TENSOR1)->data<TYPE1>()+(TENSOR1)->storage_offset(); \
TENSOR1##_stride = THTensor_strideLegacyNoScalars((TENSOR1), DIMENSION); \
TENSOR1##_size = THTensor_sizeLegacyNoScalars(TENSOR1, DIMENSION); \
\
TENSOR2##_data = THTensor_getStoragePtr(TENSOR2)->data<TYPE2>()+(TENSOR2)->storage_offset(); \
TENSOR2##_stride = THTensor_strideLegacyNoScalars((TENSOR2), DIMENSION); \
TENSOR2##_size = THTensor_sizeLegacyNoScalars(TENSOR2, DIMENSION); \
\
while(!TH_TENSOR_DIM_APPLY_hasFinished) \
{ \
CODE \
\
if(THTensor_nDimensionLegacyNoScalars(TENSOR1) == 1) \
break; \
\
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyNoScalars(TENSOR1); TH_TENSOR_DIM_APPLY_i++) \
{ \
if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
{ \
if(TH_TENSOR_DIM_APPLY_i == THTensor_nDimensionLegacyNoScalars(TENSOR1)-1) \
{ \
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
break; \
} \
continue; \
} \
\
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \
TENSOR1##_data += THTensor_strideLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i); \
TENSOR2##_data += THTensor_strideLegacyNoScalars(TENSOR2, TH_TENSOR_DIM_APPLY_i); \
\
if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == THTensor_sizeLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i)) \
{ \
if(TH_TENSOR_DIM_APPLY_i == THTensor_nDimensionLegacyNoScalars(TENSOR1)-1) \
{ \
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
break; \
} \
else \
{ \
TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*THTensor_strideLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i); \
TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*THTensor_strideLegacyNoScalars(TENSOR2, TH_TENSOR_DIM_APPLY_i); \
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
} \
} \
else \
break; \
} \
} \
THFree(TH_TENSOR_DIM_APPLY_counter); \
}
/**
* The basic idea for DIM_APPLY: Given a TENSOR and a DIMENSION, provide access to the data stored
* at all sets of dimension values other than DIMENSION, such that we can get all the values at those
* fixed indices for the various values at DIMENSION.
*
* Suppose we have a 2x3x4 Tensor A, and we have DIMENSION=2. Then we will hit CODE (2x3) times, and the
* pointer into storage will be at:
*
* A[0][0]
* A[0][1]
* A[0][2]
* A[1][0]
* A[1][1]
* A[1][2]
*
* And at each point, we can access the data for each of the four elements of the Tensor via
* TENSOR##_stride. So for example, if we wanted to sum the elements there, we could do:
*
* int64_t i = 0;
* TYPE sum;
* for (i = 0; i < TENSOR##_size; i++) {
* sum += TENSOR##_data[i * TENSOR##_stride]
* }
*
* Note that we don't have to have DIMENSION be the last tensor. If we have DIMENSION=1, then we will hit the
* code (2x4) times, with pointer into the storage at:
*
* offset +
* stride_0 * 0 + stride_2 * 0
* stride_0 * 1 + stride_2 * 0
* stride_0 * 0 + stride_2 * 1
* stride_0 * 1 + stride_2 * 1
* stride_0 * 0 + stride_2 * 2
* stride_0 * 1 + stride_2 * 2
* stride_0 * 0 + stride_2 * 3
* stride_0 * 1 + stride_2 * 3
*
* So we can again sum over the values at DIMENSION with the other indices fixed.
*/
#define TH_TENSOR_DIM_APPLY(TYPE, TENSOR, DIMENSION, CODE) \
{ \
TYPE *TENSOR##_data = NULL; \
int64_t TENSOR##_stride = 0, TENSOR##_size = 0; \
int64_t *TH_TENSOR_DIM_APPLY_counter = NULL; \
int TH_TENSOR_DIM_APPLY_hasFinished = 0; \
int TH_TENSOR_DIM_APPLY_i; \
\
if( (DIMENSION < 0) || (DIMENSION >= THTensor_nDimensionLegacyAll(TENSOR)) ) \
THError("invalid dimension"); \
\
TENSOR##_data = THTensor_getStoragePtr(TENSOR)->data<TYPE>()+(TENSOR)->storage_offset(); \
TENSOR##_stride = THTensor_strideLegacyNoScalars((TENSOR), DIMENSION); \
TENSOR##_size = THTensor_sizeLegacyNoScalars(TENSOR, DIMENSION); \
/* Counter stores the indices into the Tensor at any time */ \
TH_TENSOR_DIM_APPLY_counter = (int64_t*)THAlloc(sizeof(int64_t)*(THTensor_nDimensionLegacyAll(TENSOR))); \
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyAll(TENSOR); TH_TENSOR_DIM_APPLY_i++) \
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
\
while(!TH_TENSOR_DIM_APPLY_hasFinished) \
{ \
CODE \
\
if(THTensor_nDimensionLegacyAll(TENSOR) == 1) \
break; \
\
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyAll(TENSOR); TH_TENSOR_DIM_APPLY_i++) \
{ \
/* Check if the index is equal to DIMENSION. We don't need to update the */ \
/* offset if this is the case, and can consider the next index. However, */ \
/* in the case that the DIMENSION is the last index in the Tensor, then */ \
/* we have parsed the entire tensor and can exit */ \
if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
{ \
if(TH_TENSOR_DIM_APPLY_i == THTensor_nDimensionLegacyAll(TENSOR)-1) \
{ \
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
break; \
} \
continue; \
} \
\
/* Bump the counter at this index, update the pointer */ \
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \
TENSOR##_data += THTensor_strideLegacyNoScalars(TENSOR, TH_TENSOR_DIM_APPLY_i); \
\
if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == THTensor_sizeLegacyNoScalars(TENSOR, TH_TENSOR_DIM_APPLY_i)) \
{ \
/* Handled TENSOR_size(dim) iterations for DIM_APPLY_i. If this is the last dimension, exit */ \
if(TH_TENSOR_DIM_APPLY_i == THTensor_nDimensionLegacyAll(TENSOR)-1) \
{ \
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
break; \
} \
else \
{ \
/* Reset the counter, and the pointer to the beginning of the storage for this combination of indices */ \
TENSOR##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*THTensor_strideLegacyNoScalars(TENSOR, TH_TENSOR_DIM_APPLY_i); \
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
} \
} \
else \
break; \
} \
} \
THFree(TH_TENSOR_DIM_APPLY_counter); \
}
#endif

View File

@ -1,20 +0,0 @@
#include <TH/THTensor.hpp>
#include <TH/THVector.h>
#include <TH/THBlas.h>
#include <TH/THTensorDimApply.h>
// NOLINTNEXTLINE(bugprone-suspicious-include)
#include <TH/generic/THTensorMoreMath.cpp>
#include <TH/THGenerateAllTypes.h>
// NOLINTNEXTLINE(bugprone-suspicious-include)
#include <TH/generic/THTensorMoreMath.cpp>
#include <TH/THGenerateBoolType.h>
// NOLINTNEXTLINE(bugprone-suspicious-include)
#include <TH/generic/THTensorMoreMath.cpp>
#include <TH/THGenerateBFloat16Type.h>
// NOLINTNEXTLINE(bugprone-suspicious-include)
#include <TH/generic/THTensorMoreMath.cpp>
#include <TH/THGenerateHalfType.h>

View File

@ -1,24 +0,0 @@
#ifndef TH_VECTOR_INC
#define TH_VECTOR_INC
#include <TH/THGeneral.h>
#define THVector_(NAME) TH_CONCAT_4(TH,Real,Vector_,NAME)
/* We are going to use dynamic dispatch, and want only to generate declarations
* of the vector functions */
#include <TH/generic/THVector.h>
#include <TH/THGenerateAllTypes.h>
#include <TH/generic/THVector.h>
#include <TH/THGenerateHalfType.h>
#include <TH/generic/THVector.h>
#include <TH/THGenerateBoolType.h>
#include <TH/generic/THVector.h>
#include <TH/THGenerateBFloat16Type.h>
#include <TH/generic/THVector.h>
#include <TH/THGenerateComplexTypes.h>
#endif // TH_VECTOR_INC

View File

@ -1,48 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "TH/generic/THBlas.cpp"
#else
#ifdef BLAS_F2C
# define ffloat double
#else
# define ffloat float
#endif
TH_EXTERNC void dswap_(int *n, double *x, int *incx, double *y, int *incy);
TH_EXTERNC void sswap_(int *n, float *x, int *incx, float *y, int *incy);
void THBlas_(swap)(int64_t n, scalar_t *x, int64_t incx, scalar_t *y, int64_t incy)
{
if(n == 1)
{
incx = 1;
incy = 1;
}
#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
{
int i_n = (int)n;
int i_incx = (int)incx;
int i_incy = (int)incy;
#if defined(TH_REAL_IS_DOUBLE)
dswap_(&i_n, x, &i_incx, y, &i_incy);
#else
sswap_(&i_n, x, &i_incx, y, &i_incy);
#endif
return;
}
#endif
{
int64_t i;
for(i = 0; i < n; i++)
{
scalar_t z = x[i*incx];
x[i*incx] = y[i*incy];
y[i*incy] = z;
}
}
}
#endif

View File

@ -1,8 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "TH/generic/THBlas.h"
#else
/* Level 1 */
TH_API void THBlas_(swap)(int64_t n, scalar_t *x, int64_t incx, scalar_t *y, int64_t incy);
#endif

View File

@ -8,50 +8,6 @@
#include <ATen/NamedTensorUtils.h> #include <ATen/NamedTensorUtils.h>
#include <ATen/MemoryOverlap.h> #include <ATen/MemoryOverlap.h>
/**** access methods ****/
THStorage *THTensor_(storage)(const THTensor *self)
{
return THTensor_getStoragePtr(self);
}
ptrdiff_t THTensor_(storageOffset)(const THTensor *self)
{
return self->storage_offset();
}
int THTensor_(nDimension)(const THTensor *self)
{
return THTensor_nDimension(self);
}
int THTensor_(nDimensionLegacyNoScalars)(const THTensor *self)
{
return THTensor_nDimensionLegacyNoScalars(self);
}
int THTensor_(nDimensionLegacyAll)(const THTensor *self)
{
return THTensor_nDimensionLegacyAll(self);
}
int64_t THTensor_(size)(const THTensor *self, int dim)
{
THArgCheck((dim >= 0) && (dim < self->dim()), 2, "dimension %d out of range of %dD tensor",
dim, THTensor_(nDimensionLegacyNoScalars)(self));
return self->size(dim);
}
int64_t THTensor_(stride)(const THTensor *self, int dim)
{
THArgCheck((dim >= 0) && (dim < self->dim()), 2, "dimension %d out of range of %dD tensor",
dim, THTensor_(nDimensionLegacyNoScalars)(self));
return self->stride(dim);
}
scalar_t *THTensor_(data)(const THTensor *self) {
return self->data<scalar_t>();
}
/**** creation methods ****/ /**** creation methods ****/
/* Empty init */ /* Empty init */
@ -69,12 +25,6 @@ THTensor *THTensor_(new)(void)
.release(); .release();
} }
/* Pointer-copy init */
THTensor *THTensor_(newWithTensor)(THTensor *tensor)
{
return at::native::alias(THTensor_wrap(tensor)).unsafeReleaseTensorImpl();
}
THTensor *THTensor_(newWithStorage1d)(THStorage *storage, ptrdiff_t storageOffset, THTensor *THTensor_(newWithStorage1d)(THStorage *storage, ptrdiff_t storageOffset,
int64_t size0, int64_t stride0) int64_t size0, int64_t stride0)
{ {
@ -94,442 +44,14 @@ THTensor *THTensor_(newWithStorage1d)(THStorage *storage, ptrdiff_t storageOffse
return self; return self;
} }
THTensor *THTensor_(newWithSize1d)(int64_t size0)
{
THStorage *new_storage = THStorage_(new)();
THTensor* self =
c10::make_intrusive<at::TensorImpl, at::UndefinedTensorImpl>(
c10::intrusive_ptr<at::StorageImpl>::reclaim(new_storage),
at::DispatchKey::CPU,
caffe2::TypeMeta::Make<scalar_t>())
.release();
THTensor_(setStorage)(self, new_storage, 0, {size0}, {});
return self;
}
THTensor *THTensor_(newClone)(THTensor *self)
{
// already available in Aten as at::clone()
THTensor *tensor = THTensor_(new)();
at::Tensor tensor_wrap = THTensor_wrap(tensor);
at::Tensor self_wrap = THTensor_wrap(self);
tensor_wrap.resize_as_(self_wrap);
at::native::copy_(tensor_wrap, self_wrap, false);
return tensor;
}
THTensor *THTensor_(newContiguous)(THTensor *self)
{
if(!THTensor_(isContiguous)(self))
return THTensor_(newClone)(self);
else
{
THTensor_(retain)(self);
return self;
}
}
THTensor *THTensor_(newSelect)(THTensor *tensor, int dimension_, int64_t sliceIndex_)
{
THTensor *self = THTensor_(newWithTensor)(tensor);
THTensor_(select)(self, NULL, dimension_, sliceIndex_);
return self;
}
THTensor *THTensor_(newNarrow)(THTensor *tensor, int dimension_, int64_t firstIndex_, int64_t size_)
{
THTensor *self = THTensor_(newWithTensor)(tensor);
THTensor_(narrow)(self, NULL, dimension_, firstIndex_, size_);
return self;
}
THTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int dimension2_)
{
THTensor *self = THTensor_(newWithTensor)(tensor);
THTensor_(transpose)(self, NULL, dimension1_, dimension2_);
return self;
}
/* Resize */
void THTensor_(resize)(THTensor *self, at::IntArrayRef size, at::IntArrayRef stride)
{
return THTensor_resize(self, size, stride);
}
void THTensor_(resizeAs)(THTensor *self, THTensor *src)
{
// already available in Aten as at::resize_as_()
if(!THTensor_(isSameSizeAs)(self, src))
THTensor_(resizeNd)(self, src->dim(), THTensor_getSizePtr(src), NULL);
}
void THTensor_(resize0d)(THTensor *tensor)
{
THTensor_(resizeNd)(tensor, 0, {}, nullptr);
}
void THTensor_(resize1d)(THTensor *tensor, int64_t size0)
{
int64_t size[1] = {size0};
THTensor_(resizeNd)(tensor, 1, size, nullptr);
}
void THTensor_(resize2d)(THTensor *tensor, int64_t size0, int64_t size1)
{
int64_t size[2] = {size0, size1};
THTensor_(resizeNd)(tensor, 2, size, nullptr);
}
void THTensor_(resize3d)(THTensor *tensor, int64_t size0, int64_t size1, int64_t size2)
{
int64_t size[3] = {size0, size1, size2};
THTensor_(resizeNd)(tensor, 3, size, nullptr);
}
void THTensor_(resize4d)(THTensor *self, int64_t size0, int64_t size1, int64_t size2, int64_t size3)
{
int64_t size[4] = {size0, size1, size2, size3};
THTensor_(resizeNd)(self, 4, size, nullptr);
}
void THTensor_(resize5d)(THTensor *self, int64_t size0, int64_t size1, int64_t size2, int64_t size3, int64_t size4)
{
int64_t size[5] = {size0, size1, size2, size3, size4};
THTensor_(resizeNd)(self, 5, size, nullptr);
}
void THTensor_(set)(THTensor *self, THTensor *src)
{
if(self != src)
THTensor_(setStorage)(self,
THTensor_getStoragePtr(src),
src->storage_offset(),
src->sizes(),
src->strides());
}
void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_) void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_)
{ {
THTensor_setStorage(self, storage_, storageOffset_, size_, stride_); THTensor_setStorage(self, storage_, storageOffset_, size_, stride_);
} }
void THTensor_(narrow)(THTensor *self, THTensor *src, int dimension, int64_t firstIndex, int64_t size)
{
if(!src)
src = self;
THArgCheck( (dimension >= 0) && (dimension < src->dim()), 2, "out of range");
THArgCheck( firstIndex >= 0, 3, "out of range");
THArgCheck( size >= 0, 4, "out of range");
THArgCheck(firstIndex <= src->size(dimension) - size, 4, "out of range");
THTensor_(set)(self, src);
if (firstIndex > 0) {
self->set_storage_offset(self->storage_offset() + firstIndex*self->stride(dimension));
}
self->set_size(dimension, size);
}
void THTensor_(select)(THTensor *self, THTensor *src, int dimension, int64_t sliceIndex)
{
int d;
if(!src)
src = self;
THArgCheck(src->dim() > 0, 1, "cannot select on a 0-dim tensor");
THArgCheck((dimension >= 0) && (dimension < src->dim()), 2, "out of range");
THArgCheck((sliceIndex >= 0) && (sliceIndex < src->size(dimension)), 3, "out of range");
THTensor_(set)(self, src);
THTensor_(narrow)(self, NULL, dimension, sliceIndex, 1);
at::DimVector newSize(static_cast<size_t>(self->dim()-1));
at::DimVector newStride(static_cast<size_t>(self->dim()-1));
for (d = 0; d < dimension; d++)
{
newSize[d] = self->size(d);
newStride[d] = self->stride(d);
}
for(d = dimension; d < self->dim()-1; d++)
{
newSize[d] = self->size(d+1);
newStride[d] = self->stride(d+1);
}
self->set_sizes_and_strides(newSize, newStride);
}
void THTensor_(transpose)(THTensor *self, THTensor *src, int dimension1, int dimension2)
{
int64_t z;
if(!src)
src = self;
THArgCheck( (dimension1 >= 0) && (dimension1 < THTensor_nDimensionLegacyNoScalars(src)), 1, "out of range");
THArgCheck( (dimension2 >= 0) && (dimension2 < THTensor_nDimensionLegacyNoScalars(src)), 2, "out of range");
THTensor_(set)(self, src);
if(dimension1 == dimension2)
return;
z = self->stride(dimension1);
self->set_stride(dimension1, self->stride(dimension2));
self->set_stride(dimension2, z);
z = self->size(dimension1);
self->set_size(dimension1, self->size(dimension2));
self->set_size(dimension2, z);
}
void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension)
{
int d;
if(!src)
src = self;
THArgCheck((dimension >= 0) && (dimension < src->dim()), 2, "dimension out of range");
THTensor_(set)(self, src);
if(src->size(dimension) == 1)
{
at::DimVector newSize(static_cast<size_t>(self->dim() - 1));
at::DimVector newStride(static_cast<size_t>(self->dim() - 1));
for (d = 0; d < dimension; d++)
{
newSize[d] = self->size(d);
newStride[d] = self->stride(d);
}
for(d = dimension; d < self->dim()-1; d++)
{
newSize[d] = self->size(d+1);
newStride[d] = self->stride(d+1);
}
self->set_sizes_and_strides(newSize, newStride);
}
}
void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension)
{
int d;
if(!src)
src = self;
THArgCheck((dimension >= 0) && (dimension <= src->dim()), 2, "dimension out of range");
THTensor_(set)(self, src);
at::DimVector newSize(static_cast<size_t>(/* size */ self->dim()+1));
at::DimVector newStride(static_cast<size_t>(/* size */ self->dim()+1));
for(d = self->dim(); d > dimension; d--)
{
newSize[d] = self->size(d-1);
newStride[d] = self->stride(d-1);
}
if (dimension < self->dim())
{
newStride[dimension] = self->size(dimension) * self->stride(dimension);
}
else
{
newStride[dimension] = 1;
}
newSize[dimension] = 1;
for(d = dimension - 1; d >= 0; d--)
{
newSize[d] = self->size(d);
newStride[d] = self->stride(d);
}
self->set_sizes_and_strides(newSize, newStride);
}
int THTensor_(isTransposed)(const THTensor *self)
{
if (THTensor_(isContiguous)(self)) {
return 0;
}
int64_t max_stride = 1;
int64_t size_max_stride = 1;
int64_t z = 1;
int d;
for (d = 0; d < self->dim(); ++d) {
if (self->stride(d) == 0 && self->size(d) != 1)
return 0;
if (self->stride(d) > max_stride) {
max_stride = self->stride(d);
size_max_stride = self->size(d);
}
z *= self->size(d);
}
if (z == max_stride * size_max_stride) {
return 1;
}
return 0;
}
int THTensor_(isContiguous)(const THTensor *self)
{
return self->is_contiguous();
}
int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor* src)
{
int d;
if (self->dim() != src->dim())
return 0;
for(d = 0; d < self->dim(); ++d)
{
if(self->size(d) != src->size(d))
return 0;
}
return 1;
}
ptrdiff_t THTensor_(nElement)(const THTensor *self)
{
if(THTensor_nDimensionLegacyAll(self) == 0)
return 0;
else
{
ptrdiff_t nElement = 1;
int d;
for(d = 0; d < THTensor_nDimension(self); d++)
nElement *= self->size(d);
return nElement;
}
}
// NB: It is INVALID to call this on an UndefinedTensorImpl
void THTensor_(retain)(THTensor *self)
{
c10::raw::intrusive_ptr::incref(self);
}
void THTensor_(free)(THTensor *self) void THTensor_(free)(THTensor *self)
{ {
THTensor_free(self); THTensor_free(self);
} }
void THTensor_(freeCopyTo)(THTensor *self, THTensor *dst)
{
if(self != dst) {
at::Tensor dst_wrap = THTensor_wrap(dst);
at::Tensor self_wrap = THTensor_wrap(self);
at::native::copy_(dst_wrap, self_wrap, false);
}
THTensor_(free)(self);
}
/*******************************************************************************/
void THTensor_(resizeNd)(THTensor *self, int nDimension, const int64_t *size, const int64_t *stride)
{
return THTensor_resizeNd(self, nDimension, size, stride);
}
void THTensor_(set0d)(THTensor *tensor, scalar_t value)
{
THArgCheck(THTensor_nDimension(tensor) == 0, 1, "tensor must have no dimensions");
THStorage_(set)(THTensor_getStoragePtr(tensor), tensor->storage_offset(), value);
}
scalar_t THTensor_(get0d)(const THTensor *tensor)
{
THArgCheck(THTensor_nDimension(tensor) == 0, 1, "tensor must have no dimensions");
return THStorage_(get)(THTensor_getStoragePtr(tensor), tensor->storage_offset());
}
void THTensor_(set1d)(THTensor *tensor, int64_t x0, scalar_t value)
{
THArgCheck(THTensor_nDimensionLegacyNoScalars(tensor) == 1, 1, "tensor must have one dimension");
THArgCheck( (x0 >= 0) && (x0 < THTensor_sizeLegacyNoScalars(tensor, 0)), 2, "out of range");
THStorage_(set)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*THTensor_strideLegacyNoScalars(tensor, 0), value);
}
scalar_t THTensor_(get1d)(const THTensor *tensor, int64_t x0)
{
THArgCheck(THTensor_nDimensionLegacyNoScalars(tensor) == 1, 1, "tensor must have one dimension");
THArgCheck( (x0 >= 0) && (x0 < THTensor_sizeLegacyNoScalars(tensor, 0)), 2, "out of range");
return THStorage_(get)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*THTensor_strideLegacyNoScalars(tensor, 0));
}
void THTensor_(set2d)(THTensor *tensor, int64_t x0, int64_t x1, scalar_t value)
{
THArgCheck(THTensor_nDimensionLegacyAll(tensor) == 2, 1, "tensor must have two dimensions");
THArgCheck((x0 >= 0) && (x0 < tensor->size(0)) && (x1 >= 0) && (x1 < tensor->size(1)), 2, "out of range");
THStorage_(set)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1), value);
}
scalar_t THTensor_(get2d)(const THTensor *tensor, int64_t x0, int64_t x1)
{
THArgCheck(THTensor_nDimensionLegacyAll(tensor) == 2, 1, "tensor must have two dimensions");
THArgCheck((x0 >= 0) && (x0 < tensor->size(0)) && (x1 >= 0) && (x1 < tensor->size(1)), 2, "out of range");
return THStorage_(get)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1));
}
void THTensor_(set3d)(THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, scalar_t value)
{
THArgCheck(THTensor_nDimensionLegacyAll(tensor) == 3, 1, "tensor must have three dimensions");
THArgCheck( (x0 >= 0) && (x0 < tensor->size(0)) && (x1 >= 0) && (x1 < tensor->size(1)) && (x2 >= 0) && (x2 < tensor->size(2)), 2, "out of range");
THStorage_(set)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1)+x2*tensor->stride(2), value);
}
scalar_t THTensor_(get3d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_t x2)
{
THArgCheck(THTensor_nDimensionLegacyAll(tensor) == 3, 1, "tensor must have three dimensions");
THArgCheck( (x0 >= 0) && (x0 < tensor->size(0)) && (x1 >= 0) && (x1 < tensor->size(1)) && (x2 >= 0) && (x2 < tensor->size(2)), 2, "out of range");
return THStorage_(get)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1)+x2*tensor->stride(2));
}
void THTensor_(set4d)(THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, int64_t x3, scalar_t value)
{
THArgCheck(THTensor_nDimensionLegacyAll(tensor) == 4, 1, "tensor must have four dimensions");
THArgCheck((x0 >= 0) && (x0 < tensor->size(0)) && (x1 >= 0) && (x1 < tensor->size(1)) && (x2 >= 0) && (x2 < tensor->size(2)) && (x3 >= 0) && (x3 < tensor->size(3)), 2, "out of range");
THStorage_(set)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1)+x2*tensor->stride(2)+x3*tensor->stride(3), value);
}
scalar_t THTensor_(get4d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, int64_t x3)
{
THArgCheck(THTensor_nDimensionLegacyAll(tensor) == 4, 1, "tensor must have four dimensions");
THArgCheck((x0 >= 0) && (x0 < tensor->size(0)) && (x1 >= 0) && (x1 < tensor->size(1)) && (x2 >= 0) && (x2 < tensor->size(2)) && (x3 >= 0) && (x3 < tensor->size(3)), 2, "out of range");
return THStorage_(get)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1)+x2*tensor->stride(2)+x3*tensor->stride(3));
}
THDescBuff THTensor_(desc)(const THTensor *tensor) {
const int L = TH_DESC_BUFF_LEN;
THDescBuff buf;
char *str = buf.str;
int n = 0;
#define _stringify(x) #x
n += snprintf(str, L-n, "torch." _stringify(x) "Tensor of size ");
#undef _stringify
int i;
for(i = 0; i < THTensor_nDimension(tensor); i++) {
if(n >= L) break;
n += snprintf(str+n, L-n, "%" PRId64, tensor->size(i));
if(i < THTensor_nDimension(tensor)-1) {
n += snprintf(str+n, L-n, "x");
}
}
if(n >= L) {
snprintf(str+L-4, 4, "...");
}
return buf;
}
THDescBuff THTensor_(sizeDesc)(const THTensor *tensor) {
THDescBuff buf = _THSizeDesc(tensor->sizes().data(), tensor->sizes().size());
return buf;
}
#endif #endif

View File

@ -55,24 +55,12 @@ TH_API THTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int
// This is especially likely to happen when the tensor is not contiguous. In general, if you still need the // This is especially likely to happen when the tensor is not contiguous. In general, if you still need the
// values, unless you are doing some size and stride tricks, do not use resize*. // values, unless you are doing some size and stride tricks, do not use resize*.
TH_API void THTensor_(resizeNd)(THTensor *tensor, int nDimension, const int64_t *size, const int64_t *stride); TH_API void THTensor_(resizeNd)(THTensor *tensor, int nDimension, const int64_t *size, const int64_t *stride);
TH_API void THTensor_(resizeAs)(THTensor *tensor, THTensor *src);
TH_API void THTensor_(resize0d)(THTensor *tensor);
TH_API void THTensor_(resize1d)(THTensor *tensor, int64_t size0_);
TH_API void THTensor_(resize2d)(THTensor *tensor, int64_t size0_, int64_t size1_);
TH_API void THTensor_(resize3d)(THTensor *tensor, int64_t size0_, int64_t size1_, int64_t size2_);
TH_API void THTensor_(resize4d)(THTensor *tensor, int64_t size0_, int64_t size1_, int64_t size2_, int64_t size3_);
TH_API void THTensor_(resize5d)(THTensor *tensor, int64_t size0_, int64_t size1_, int64_t size2_, int64_t size3_, int64_t size4_);
// Note: these are legacy resize functions that treat sizes as size->size() == 0 and size->data<int64_t>() as being 0-terminated. // Note: these are legacy resize functions that treat sizes as size->size() == 0 and size->data<int64_t>() as being 0-terminated.
TH_API void THTensor_(set)(THTensor *self, THTensor *src); TH_API void THTensor_(set)(THTensor *self, THTensor *src);
TH_API void THTensor_(narrow)(THTensor *self, THTensor *src, int dimension_, int64_t firstIndex_, int64_t size_); TH_API void THTensor_(narrow)(THTensor *self, THTensor *src, int dimension_, int64_t firstIndex_, int64_t size_);
TH_API void THTensor_(select)(THTensor *self, THTensor *src, int dimension_, int64_t sliceIndex_); TH_API void THTensor_(select)(THTensor *self, THTensor *src, int dimension_, int64_t sliceIndex_);
TH_API void THTensor_(transpose)(THTensor *self, THTensor *src, int dimension1_, int dimension2_);
TH_API int THTensor_(isTransposed)(const THTensor *self);
TH_API void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension_);
TH_API void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension_);
TH_API int THTensor_(isContiguous)(const THTensor *self); TH_API int THTensor_(isContiguous)(const THTensor *self);
TH_API int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor *src); TH_API int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor *src);
@ -80,23 +68,5 @@ TH_API ptrdiff_t THTensor_(nElement)(const THTensor *self);
TH_API void THTensor_(retain)(THTensor *self); TH_API void THTensor_(retain)(THTensor *self);
TH_API void THTensor_(free)(THTensor *self); TH_API void THTensor_(free)(THTensor *self);
TH_API void THTensor_(freeCopyTo)(THTensor *self, THTensor *dst);
/* Slow access methods [check everything] */
TH_API void THTensor_(set0d)(THTensor *tensor, scalar_t value);
TH_API void THTensor_(set1d)(THTensor *tensor, int64_t x0, scalar_t value);
TH_API void THTensor_(set2d)(THTensor *tensor, int64_t x0, int64_t x1, scalar_t value);
TH_API void THTensor_(set3d)(THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, scalar_t value);
TH_API void THTensor_(set4d)(THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, int64_t x3, scalar_t value);
TH_API scalar_t THTensor_(get0d)(const THTensor *tensor);
TH_API scalar_t THTensor_(get1d)(const THTensor *tensor, int64_t x0);
TH_API scalar_t THTensor_(get2d)(const THTensor *tensor, int64_t x0, int64_t x1);
TH_API scalar_t THTensor_(get3d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_t x2);
TH_API scalar_t THTensor_(get4d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, int64_t x3);
/* Debug methods */
TH_API THDescBuff THTensor_(desc)(const THTensor *tensor);
TH_API THDescBuff THTensor_(sizeDesc)(const THTensor *tensor);
#endif #endif

View File

@ -11,6 +11,4 @@
TH_CPP_API void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, TH_CPP_API void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
at::IntArrayRef size_, at::IntArrayRef stride_); at::IntArrayRef size_, at::IntArrayRef stride_);
TH_CPP_API void THTensor_(resize)(THTensor *self, at::IntArrayRef size, at::IntArrayRef stride);
#endif #endif

View File

@ -1,369 +0,0 @@
#include <TH/THTensorApply.h>
#ifndef NAN
#define NAN (nan(NULL))
#endif
#define HYPER_TH_OMP_OVERHEAD_THRESHOLD (at::internal::GRAIN_SIZE / 16)
#define ORDIN_TH_OMP_OVERHEAD_THRESHOLD (at::internal::GRAIN_SIZE / 4)
#define UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD (at::internal::GRAIN_SIZE / 2)
#define TH_OMP_OVERHEAD_THRESHOLD (at::internal::GRAIN_SIZE)
#define TH_CHECK_SAME_SIZE(TENSOR1, TENSOR2) \
{ \
if (!THTensor_(isSameSizeAs)(TENSOR1, TENSOR2)) { \
AT_ERROR("inconsistent tensor size, expected ", #TENSOR1, " ", TENSOR1->sizes(), " and ", #TENSOR2, " ", TENSOR2->sizes(), " to have the same size"); \
} \
}
// Used for `scatter` and `scatterAdd`
// Assumes TENSOR1 is index
// TENSOR2 is real
// TENSOR3 is src
// Tests:
// 1. index->size(d) <= src->size(d) for all d
// 2. index->size(d) <= real->size(d) for all d != dim
#define TH_TENSOR_DIM_APPLY3_SIZE_SCATTER(TENSOR1, TENSOR2, TENSOR3, DIMENSION) \
{ \
int shape_check_flag = 0; \
for (TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyAll(TENSOR2); TH_TENSOR_DIM_APPLY_i++) \
{ \
int64_t TENSOR1##_dim_size = THTensor_sizeLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i); \
if (TH_TENSOR_DIM_APPLY_i != DIMENSION) { \
if (TENSOR1##_dim_size > THTensor_sizeLegacyNoScalars(TENSOR2, TH_TENSOR_DIM_APPLY_i)) { \
shape_check_flag = 1; \
break; \
} \
} \
if (TENSOR1##_dim_size > THTensor_sizeLegacyNoScalars(TENSOR3, TH_TENSOR_DIM_APPLY_i)) { \
shape_check_flag = 1; \
break; \
} \
} \
if (shape_check_flag == 1) { \
AT_ERROR("Expected ", #TENSOR1, " ", TENSOR1->sizes(), " to be smaller size than ", #TENSOR3, " ", TENSOR3->sizes(), " and to be smaller than ", #TENSOR2, " ", TENSOR2->sizes(), " apart from dimension ", DIMENSION); \
} \
}
#undef th_isnan
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
#define th_isnan(val) \
(std::isnan(val))
#else
#define th_isnan(val) (0)
#endif
#undef th_isnan_break
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
#define th_isnan_break(val) \
if (std::isnan(val)) break;
#else
#define th_isnan_break(val)
#endif
#if defined(__clang__)
#define PRAGMA(P) _Pragma(#P)
#define PRAGMA_IVDEP // Noop
#define PRAGMA_SIMD // Noop
#elif defined(_MSC_VER)
#define PRAGMA(P) __pragma(P)
# if _MSC_VER < 1920
// MSVC < 2019 doesn't support loop pragmas.
# define PRAGMA_IVDEP // Noop
# define PRAGMA_SIMD // Noop
# else
# define PRAGMA_IVDEP PRAGMA(loop(ivdep))
# define PRAGMA_SIMD PRAGMA(omp simd)
# endif
#else
#define PRAGMA(P) _Pragma(#P)
#define PRAGMA_IVDEP PRAGMA(ivdep)
#define PRAGMA_SIMD PRAGMA(simd)
#endif
#define TH_TENSOR_APPLY2_PARALLEL(SIZE, CONTIG1, CONTIG2, TYPE1, TENSOR1, TYPE2, TENSOR2, CODE, THRESHOLD) \
{ \
/* for advanced searching index*/ \
if (CONTIG1 && CONTIG2) { \
TYPE1 *rp = THTensor_getStoragePtr(TENSOR1)->data<TYPE1>()+TENSOR1->storage_offset(); \
TYPE2 *tp = THTensor_getStoragePtr(TENSOR2)->data<TYPE2>()+TENSOR2->storage_offset(); \
if (tp != (TYPE2*)rp) { \
at::parallel_for(0, SIZE, (THRESHOLD * 10), [&](int64_t begin, int64_t end) { \
PRAGMA_IVDEP \
for (auto iter = begin; iter < end; iter++) { \
TYPE2 *TENSOR2##_data = tp+iter; \
TYPE1 *TENSOR1##_data = rp+iter; \
CODE \
} \
}); \
} else { \
at::parallel_for(0, SIZE, (THRESHOLD * 10), [&](int64_t begin, int64_t end) { \
PRAGMA_SIMD \
for (auto iter = begin; iter < end; iter++) { \
TYPE2* TENSOR2##_data = tp+iter; \
TYPE1* TENSOR1##_data = rp+iter; \
CODE \
} \
}); \
} \
} else { \
/* The following strategy is not easy to understand.
* 1. Collapse the dimension of the tensors in order to decrease the number of nested loops.
* 2. Calculate the numbers of elements allocated in each thread and the line index of the first one.
* 3. Calculate the memory offset of the first element and the indexes in each dimension of the
* first one.
* 4. iterate all elements in each thread. update the indexes in each dimension of the rest.
*/ \
int TH_TENSOR_APPLY_hasFinished = 0; \
int64_t TH_TENSOR_dim_index = 0; \
/*step 1*/ \
__TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, -1, 1) \
__TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, -1, 1) \
if (0 == TH_TENSOR_APPLY_hasFinished) { \
auto TENSOR1##_i_local = TENSOR1##_i; \
auto TENSOR2##_i_local = TENSOR2##_i; \
auto TENSOR1##_data_local = TENSOR1##_data; \
auto TENSOR2##_data_local = TENSOR2##_data; \
at::parallel_for(0, SIZE, THRESHOLD, [&](int64_t begin, int64_t end) { \
auto TENSOR1##_i = TENSOR1##_i_local; \
auto TENSOR2##_i = TENSOR2##_i_local; \
auto TENSOR1##_data = TENSOR1##_data_local; \
auto TENSOR2##_data = TENSOR2##_data_local; \
/*step 2*/ \
ptrdiff_t line_index_start = begin; \
ptrdiff_t line_seg_length = (end - begin); \
/* step 3*/ \
__TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR2); \
__TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR1); \
TENSOR2##_data += TENSOR2##_memory_offset; \
TENSOR1##_data += TENSOR1##_memory_offset; \
ptrdiff_t count = 0; \
ptrdiff_t TENSOR2##_start = TENSOR2##_counter_tmp[TENSOR2##_dim-1]; \
ptrdiff_t TENSOR1##_start = TENSOR1##_counter_tmp[TENSOR1##_dim-1]; \
/* step 4*/ \
while (count < line_seg_length) { \
for (TENSOR2##_i=TENSOR2##_start, TENSOR1##_i = TENSOR1##_start; ((count < line_seg_length) && (TENSOR2##_i < TENSOR2##_size) && (TENSOR1##_i < TENSOR1##_size)); ++TENSOR2##_i, ++TENSOR1##_i, ++count) { \
CODE \
TENSOR2##_data += TENSOR2##_stride; \
TENSOR1##_data += TENSOR1##_stride; \
} \
if (count < line_seg_length) { \
__TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR2); \
__TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR1); \
} \
} \
if (TENSOR1##_counter_tmp != NULL) { \
THFree(TENSOR1##_counter_tmp); \
} \
if (TENSOR2##_counter_tmp != NULL) { \
THFree(TENSOR2##_counter_tmp); \
} \
}); \
} \
if (TENSOR2##_counter != NULL) { \
THFree(TENSOR2##_counter); \
} \
if (TENSOR1##_counter != NULL) { \
THFree(TENSOR1##_counter); \
} \
} \
}
#define TH_TENSOR_APPLY3_PARALLEL(SIZE, CONTIG1, CONTIG2, CONTIG3, TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE, THRESHOLD) \
{ \
/* for adveanced searching index*/ \
if (CONTIG1 && CONTIG2 && CONTIG3) { \
TYPE1 *rp = THTensor_getStoragePtr(TENSOR1)->data<TYPE1>()+TENSOR1->storage_offset(); \
TYPE2 *tp = THTensor_getStoragePtr(TENSOR2)->data<TYPE2>()+TENSOR2->storage_offset(); \
TYPE3 *srcp = THTensor_getStoragePtr(TENSOR3)->data<TYPE3>()+TENSOR3->storage_offset(); \
if (tp != (TYPE2*)rp) { \
at::parallel_for(0, SIZE, (THRESHOLD * 10), [&](int64_t begin, int64_t end) { \
PRAGMA_IVDEP \
for (auto iter = begin; iter < end; iter++) { \
TYPE1 *TENSOR1##_data = rp+iter; \
TYPE2 *TENSOR2##_data = tp+iter; \
TYPE3 *TENSOR3##_data = srcp+iter; \
CODE \
} \
}); \
} else { \
at::parallel_for(0, SIZE, (THRESHOLD * 10), [&](int64_t begin, int64_t end) { \
PRAGMA_SIMD \
for (auto iter = begin; iter < end; iter++) { \
TYPE1 *TENSOR1##_data = rp+iter; \
TYPE2 *TENSOR2##_data = tp+iter; \
TYPE3 *TENSOR3##_data = srcp+iter; \
CODE \
} \
}); \
} \
} else { \
int TH_TENSOR_APPLY_hasFinished = 0; \
int64_t TH_TENSOR_dim_index = 0; \
__TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, -1, 1) \
__TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, -1, 1) \
__TH_TENSOR_APPLYX_PREAMBLE(TYPE3, TENSOR3, -1, 1) \
if (0 == TH_TENSOR_APPLY_hasFinished) { \
auto TENSOR1##_i_local = TENSOR1##_i; \
auto TENSOR2##_i_local = TENSOR2##_i; \
auto TENSOR3##_i_local = TENSOR3##_i; \
auto TENSOR1##_data_local = TENSOR1##_data; \
auto TENSOR2##_data_local = TENSOR2##_data; \
auto TENSOR3##_data_local = TENSOR3##_data; \
at::parallel_for(0, SIZE, THRESHOLD, [&](int64_t begin, int64_t end) { \
auto TENSOR1##_i = TENSOR1##_i_local; \
auto TENSOR2##_i = TENSOR2##_i_local; \
auto TENSOR3##_i = TENSOR3##_i_local; \
auto TENSOR1##_data = TENSOR1##_data_local; \
auto TENSOR2##_data = TENSOR2##_data_local; \
auto TENSOR3##_data = TENSOR3##_data_local; \
ptrdiff_t line_index_start = begin; \
ptrdiff_t line_seg_length = (end - begin); \
__TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR1); \
__TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR2); \
__TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR3); \
TENSOR1##_data += TENSOR1##_memory_offset; \
TENSOR2##_data += TENSOR2##_memory_offset; \
TENSOR3##_data += TENSOR3##_memory_offset; \
ptrdiff_t count = 0; \
ptrdiff_t TENSOR1##_start = TENSOR1##_counter_tmp[TENSOR1##_dim - 1]; \
ptrdiff_t TENSOR2##_start = TENSOR2##_counter_tmp[TENSOR2##_dim - 1]; \
ptrdiff_t TENSOR3##_start = TENSOR3##_counter_tmp[TENSOR3##_dim - 1]; \
while (count < line_seg_length) { \
for (TENSOR1##_i=TENSOR1##_start, TENSOR2##_i=TENSOR2##_start,TENSOR3##_i=TENSOR3##_start; (count<line_seg_length)&&(TENSOR1##_i<TENSOR1##_size)&&(TENSOR2##_i<TENSOR2##_size)&&(TENSOR3##_i<TENSOR3##_size); ++TENSOR1##_i,++TENSOR2##_i,++TENSOR3##_i,++count) { \
CODE \
TENSOR1##_data += TENSOR1##_stride; \
TENSOR2##_data += TENSOR2##_stride; \
TENSOR3##_data += TENSOR3##_stride; \
} \
if (count < line_seg_length) { \
__TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR1); \
__TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR2); \
__TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR3); \
} \
} \
if (TENSOR1##_counter_tmp != NULL) { \
THFree(TENSOR1##_counter_tmp); \
} \
if (TENSOR2##_counter_tmp != NULL) { \
THFree(TENSOR2##_counter_tmp); \
} \
if (TENSOR3##_counter_tmp != NULL) { \
THFree(TENSOR3##_counter_tmp); \
} \
}); \
} \
if (TENSOR1##_counter != NULL) { \
THFree(TENSOR1##_counter); \
} \
if (TENSOR2##_counter != NULL) { \
THFree(TENSOR2##_counter); \
} \
if (TENSOR3##_counter != NULL) { \
THFree(TENSOR3##_counter); \
} \
} \
}
#define TH_TENSOR_APPLY_REDUCTION_SUM_PARALLEL(TYPE, TENSOR, EXPR, OUTPUT, THRESHOLD) \
{ \
int TENSOR##Contig = THTensor_(isContiguous)(TENSOR); \
ptrdiff_t TENSOR##Size = THTensor_(nElement)(TENSOR); \
if (TENSOR##Contig) { \
TYPE *rp = THTensor_getStoragePtr(TENSOR)->data<TYPE>()+TENSOR->storage_offset(); \
OUTPUT = at::parallel_reduce(0, TENSOR##Size, (THRESHOLD * 10), (accreal)0, [&](int64_t begin, int64_t end, accreal ident)->accreal { \
accreal r = ident; \
for (auto iter = begin; iter < end; iter++) { \
TYPE *TENSOR##_data = rp+iter; \
r += (EXPR); \
} \
return r; \
}, std::plus<accreal>()); \
} else { \
int TH_TENSOR_APPLY_hasFinished = 0; \
int64_t TH_TENSOR_dim_index = 0; \
__TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, -1, 1); \
if (0 == TH_TENSOR_APPLY_hasFinished) { \
auto TENSOR##_data_local = TENSOR##_data; \
auto TENSOR##_i_local = TENSOR##_i; \
OUTPUT = at::parallel_reduce(0, TENSOR##Size, THRESHOLD, (accreal)0, [&](int64_t begin, int64_t end, accreal ident)->accreal { \
auto TENSOR##_data = TENSOR##_data_local; \
auto TENSOR##_i = TENSOR##_i_local; \
ptrdiff_t line_index_start = begin; \
ptrdiff_t line_seg_length = (end - begin); \
__TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR); \
TENSOR##_data += TENSOR##_memory_offset; \
ptrdiff_t count = 0; \
ptrdiff_t TENSOR##_start = TENSOR##_counter_tmp[TENSOR##_dim - 1]; \
accreal r = ident; \
while (count < line_seg_length) { \
for (TENSOR##_i=TENSOR##_start; (count < line_seg_length)&&(TENSOR##_i < TENSOR##_size); ++TENSOR##_i, ++count) { \
r += (EXPR); \
TENSOR##_data += TENSOR##_stride; \
} \
if (count < line_seg_length) { \
__TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR); \
} \
} \
if (TENSOR##_counter_tmp != NULL) { \
THFree(TENSOR##_counter_tmp); \
} \
return r; \
}, std::plus<accreal>()); \
} \
if (TENSOR##_counter != NULL) { \
THFree(TENSOR##_counter); \
} \
} \
}
#define TH_TENSOR_APPLY_CONTIG(TYPE, TENSOR, CODE) \
{ \
auto code_fn = [&](int64_t begin, int64_t end) { \
ptrdiff_t TENSOR##_len = end - begin; \
TYPE *TENSOR##_data = TENSOR->data<scalar_t>() + begin; \
CODE \
}; \
int in_parallel = at::in_parallel_region(); \
ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR); \
if (!in_parallel) { \
at::parallel_for(0, TH_TENSOR_size, TH_OMP_OVERHEAD_THRESHOLD, code_fn); \
} else { \
code_fn(0, TH_TENSOR_size); \
} \
}
#define TH_TENSOR_APPLY2_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
{ \
auto code_fn = [&](int64_t begin, int64_t end) { \
ptrdiff_t TENSOR1##_len = end - begin; \
TYPE1 *TENSOR1##_data = TENSOR1->data<scalar_t>() + begin; \
TYPE2 *TENSOR2##_data = TENSOR2->data<scalar_t>() + begin; \
CODE \
}; \
int in_parallel = at::in_parallel_region(); \
ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR1); \
if (!in_parallel) { \
at::parallel_for(0, TH_TENSOR_size, TH_OMP_OVERHEAD_THRESHOLD, code_fn); \
} else { \
code_fn(0, TH_TENSOR_size); \
} \
}
#define TH_TENSOR_APPLY3_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \
{ \
auto code_fn = [&](int64_t begin, int64_t end) { \
ptrdiff_t TENSOR1##_len = end - begin; \
TYPE1 *TENSOR1##_data = TENSOR1->data<scalar_t>() + begin; \
TYPE2 *TENSOR2##_data = TENSOR2->data<scalar_t>() + begin; \
TYPE3 *TENSOR3##_data = TENSOR3->data<scalar_t>() + begin; \
CODE \
}; \
int in_parallel = at::in_parallel_region(); \
ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR1); \
if (!in_parallel) { \
at::parallel_for(0, TH_TENSOR_size, TH_OMP_OVERHEAD_THRESHOLD, code_fn); \
} else { \
code_fn(0, TH_TENSOR_size); \
} \
}

View File

@ -1,49 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "TH/generic/THTensorFastGetSet.hpp"
#else
static inline scalar_t THTensor_(fastGetLegacy1dNoScalars)(THTensor *self, int64_t x0) {
return self->unsafe_data<scalar_t>()[x0*THTensor_strideLegacyNoScalars(self, 0)];
}
static inline scalar_t THTensor_(fastGet1d)(THTensor *self, int64_t x0) {
return self->unsafe_data<scalar_t>()[x0*self->stride(0)];
}
static inline scalar_t THTensor_(fastGet2d)(THTensor *self, int64_t x0, int64_t x1) {
return self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)];
}
static inline scalar_t THTensor_(fastGet3d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2) {
return self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)];
}
static inline scalar_t THTensor_(fastGet4d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, int64_t x3) {
return self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)+x3*self->stride(3)];
}
static inline scalar_t THTensor_(fastGet5d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, int64_t x3, int64_t x4) {
return self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)+x3*self->stride(3)+(x4)*self->stride(4)];
}
static inline void THTensor_(fastSet1d)(THTensor *self, int64_t x0, scalar_t value) {
self->unsafe_data<scalar_t>()[x0*self->stride(0)] = value;
}
static inline void THTensor_(fastSet2d)(THTensor *self, int64_t x0, int64_t x1, scalar_t value) {
self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)] = value;
}
static inline void THTensor_(fastSet3d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, scalar_t value) {
self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)] = value;
}
static inline void THTensor_(fastSet4d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, int64_t x3, scalar_t value) {
self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)+x3*self->stride(3)] = value;
}
static inline void THTensor_(fastSet5d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, int64_t x3, int64_t x4, scalar_t value) {
self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)+x3*self->stride(3)+(x4)*self->stride(4)] = value;
}
#endif

View File

@ -1,32 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "TH/generic/THTensorMath.h"
#else
#include <ATen/core/Generator.h>
TH_API int THTensor_(equal)(THTensor *ta, THTensor *tb);
#if !defined(TH_REAL_IS_HALF)
TH_API ptrdiff_t THTensor_(numel)(THTensor *t);
#if !defined(TH_REAL_IS_BFLOAT16)
void THTensor_(preserveReduceDimSemantics)(THTensor *r_, int in_dims, int reduce_dimension, int keepdim);
TH_API void THTensor_(take)(THTensor *tensor, THTensor *src, THLongTensor *index);
TH_API void THTensor_(put)(THTensor *tensor, THLongTensor *index, THTensor *src, int accumulate);
#if !defined(TH_REAL_IS_BOOL) /* non bool only part */
TH_API void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, int64_t k, int dimension, int keepdim);
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
TH_API void THTensor_(histc)(THTensor *hist, THTensor *tensor, int64_t nbins, scalar_t minvalue, scalar_t maxvalue);
#endif
#endif
#endif
#endif /* !defined(TH_REAL_IS_HALF) */
#endif /* TH_GENERIC_FILE*/

View File

@ -1,292 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "TH/generic/THTensorMoreMath.cpp"
#else
#include <TH/generic/THTensorApply.hpp>
#include <ATen/CPUGeneratorImpl.h>
#include <ATen/Utils.h>
#include <ATen/NamedTensorUtils.h>
#include <ATen/WrapDimUtils.h>
#include <limits>
ptrdiff_t THTensor_(numel)(THTensor *t)
{
return THTensor_(nElement)(t);
}
#if !defined(TH_REAL_IS_BFLOAT16) && !defined(TH_REAL_IS_HALF)
// Helper function to be used in a reduction operation.
// Due to resize semantics of outputs, if the specified output tensor r_ has
// same size as the output of the reduction operation, then any noncontiguities
// in r_ should be preserved.
// The reduction operation, however, needs to act on r_ with an extra dimension
// (the reduced dimension), so this function "resizes" r_ and preserves its
// noncontiguities if necessary.
void THTensor_(preserveReduceDimSemantics)(
THTensor *r_, int in_dims, int reduce_dimension, int keepdim) {
if (r_ && !keepdim &&
THTensor_(nDimensionLegacyAll)(r_) == in_dims - 1 &&
THTensor_(nDimensionLegacyAll)(r_) != 0) {
THTensor_(unsqueeze1d)(r_, r_, reduce_dimension);
}
}
#if !defined(TH_REAL_IS_BOOL) /* non bool only part */
#define ARR(III) arr[(III)*stride]
#define IDX(III) idx[(III)*stride]
#define LONG_SWAP(AAA, BBB) swap = AAA; AAA = BBB; BBB = swap
#define REAL_SWAP(AAA, BBB) rswap = AAA; AAA = BBB; BBB = rswap
#define ARR_SWAP(III, JJJ) \
REAL_SWAP(ARR(III), ARR(JJJ));
#define BOTH_SWAP(III, JJJ) \
REAL_SWAP(ARR(III), ARR(JJJ)); \
LONG_SWAP(IDX(III), IDX(JJJ))
/* Emulate NumPy behavior of putting NaNs
* at the end of an ascending list. */
#define GT_OR_NAN(x, y) \
((th_isnan(x) && !(th_isnan(y))) || (x > y))
/* Implementation of the Quickselect algorithm, based on Nicolas Devillard's
public domain implementation at http://ndevilla.free.fr/median/median/
Adapted similarly to the above Quicksort algorithm. */
static void THTensor_(quickselect)(scalar_t *arr, int64_t *idx, int64_t k, int64_t elements, int64_t stride)
{
int64_t P, L, R, i, j, swap;
scalar_t rswap, piv;
L = 0;
R = elements-1;
do {
if (R <= L) /* One element only */
return;
if (R == L+1) { /* Two elements only */
if (ARR(L) > ARR(R)) {
BOTH_SWAP(L, R);
}
return;
}
/* Use median of three for pivot choice */
P=(L+R)>>1;
BOTH_SWAP(P, L+1);
if (ARR(L+1) > ARR(R)) { BOTH_SWAP(L+1, R); }
if (ARR(L) > ARR(R)) { BOTH_SWAP(L, R); }
if (ARR(L+1) > ARR(L)) { BOTH_SWAP(L+1, L); }
i = L+1;
j = R;
piv = ARR(L);
do {
do i++; while(ARR(i) < piv);
do j--; while(ARR(j) > piv);
if (j < i)
break;
BOTH_SWAP(i, j);
} while(1);
BOTH_SWAP(L, j);
/* Re-set active partition */
if (j <= k) L=i;
if (j >= k) R=j-1;
} while(1);
}
#undef ARR
#undef IDX
#undef LONG_SWAP
#undef REAL_SWAP
#undef BOTH_SWAP
void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, int64_t k, int dimension, int keepdim)
{
THTensor *temp_;
THLongTensor *tempi_;
scalar_t *temp__data;
int64_t *tempi__data;
int64_t t_size_dim;
THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(t), 3, "dimension out of range");
THArgCheck(k > 0 && k <= THTensor_sizeLegacyNoScalars(t, dimension), 2, "selected index out of range");
int in_dims = THTensor_(nDimensionLegacyAll)(t);
THTensor_(preserveReduceDimSemantics)(values_, in_dims, dimension, keepdim);
THLongTensor_preserveReduceDimSemantics(indices_, in_dims, dimension, keepdim);
std::vector<int64_t> dim = THTensor_sizesLegacyNoScalars(t);
dim[dimension] = 1;
THTensor_(resize)(values_, dim, {});
THLongTensor_resize(indices_, dim, {});
t_size_dim = THTensor_sizeLegacyNoScalars(t, dimension);
temp_ = THTensor_(new)();
THTensor_(resize1d)(temp_, t_size_dim);
temp__data = temp_->data<scalar_t>();
tempi_ = THLongTensor_new();
THLongTensor_resize1d(tempi_, t_size_dim);
tempi__data = THLongTensor_data(tempi_);
TH_TENSOR_DIM_APPLY3(scalar_t, t, scalar_t, values_, int64_t, indices_, dimension,
TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM,
int64_t i;
for(i = 0; i < t_size_dim; i++)
temp__data[i] = t_data[i*t_stride];
for(i = 0; i < t_size_dim; i++)
tempi__data[i] = i;
THTensor_(quickselect)(temp__data, tempi__data, k - 1, t_size_dim, 1);
*values__data = temp__data[k-1];
*indices__data = tempi__data[k-1];);
c10::raw::intrusive_ptr::decref(temp_);
THLongTensor_free(tempi_);
if (!keepdim) {
THTensor_(squeeze1d)(values_, values_, dimension);
THLongTensor_squeeze1d(indices_, indices_, dimension);
}
}
static void THTensor_(propagate_names_if_named_tensor_enabled)(THTensor* result, THTensor* src) {
at::namedinference::propagate_names(result, src);
}
#define LAB_IMPLEMENT_BASIC_FUNCTION_3_ARGS(NAME, CFUNC, THRESHOLD) \
void THTensor_(NAME)(THTensor *r_, THTensor *t) \
{ \
THTensor_(resizeAs)(r_, t); \
ptrdiff_t r_Size = THTensor_(nElement)(r_); \
int r_Contig = THTensor_(isContiguous)(r_); \
int tContig = THTensor_(isContiguous)(t); \
TH_TENSOR_APPLY2_PARALLEL(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = CFUNC(*t_data);, THRESHOLD); \
THTensor_(propagate_names_if_named_tensor_enabled)(r_, t); \
}
#define LAB_IMPLEMENT_BASIC_FUNCTION_2_ARGS(NAME, CFUNC) \
LAB_IMPLEMENT_BASIC_FUNCTION_3_ARGS(NAME, CFUNC, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD)
#define LAB_IMPLEMENT_VECTORIZED_FUNCTION_3_ARGS(NAME, CFUNC, THRESHOLD) \
void THTensor_(NAME)(THTensor *r_, THTensor *t) \
{ \
THTensor_(resizeAs)(r_, t); \
ptrdiff_t r_Size = THTensor_(nElement)(r_); \
int r_Contig = THTensor_(isContiguous)(r_); \
int tContig = THTensor_(isContiguous)(t); \
if (r_Contig && tContig) { \
TH_TENSOR_APPLY2_CONTIG(scalar_t, r_, scalar_t, t, THVector_(NAME)(r__data, t_data, r__len);); \
} else { \
TH_TENSOR_APPLY2_PARALLEL(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = CFUNC(*t_data);, THRESHOLD); \
} \
THTensor_(propagate_names_if_named_tensor_enabled)(r_, t); \
}
#define LAB_IMPLEMENT_VECTORIZED_FUNCTION_2_ARGS(NAME, CFUNC) \
LAB_IMPLEMENT_VECTORIZED_FUNCTION_3_ARGS(NAME, CFUNC, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD)
#define EXPAND(...) __VA_ARGS__
#define GET_4TH_ARG(ARG0, ARG1, ARG2, ARG3, ...) ARG3
#define LAB_IMPLEMENT_BASIC_FUNCTION_CHOOSE(...) \
EXPAND(GET_4TH_ARG(__VA_ARGS__, LAB_IMPLEMENT_BASIC_FUNCTION_3_ARGS, LAB_IMPLEMENT_BASIC_FUNCTION_2_ARGS, ))
#define LAB_IMPLEMENT_VECTORIZED_FUNCTION_CHOOSE(...) \
EXPAND(GET_4TH_ARG(__VA_ARGS__, LAB_IMPLEMENT_VECTORIZED_FUNCTION_3_ARGS, LAB_IMPLEMENT_VECTORIZED_FUNCTION_2_ARGS, ))
#define LAB_IMPLEMENT_BASIC_FUNCTION(...) EXPAND(LAB_IMPLEMENT_BASIC_FUNCTION_CHOOSE(__VA_ARGS__)(__VA_ARGS__))
#define LAB_IMPLEMENT_VECTORIZED_FUNCTION(...) EXPAND(LAB_IMPLEMENT_VECTORIZED_FUNCTION_CHOOSE(__VA_ARGS__)(__VA_ARGS__))
/*
* LAB_IMPLEMENT_BASIC_FUNCTION is a macro with optional parameters, you can use it flexibly.
* The macro will discard the invalid threshold if parallelization is unavailable.
* The macro will give a default threshold even if you forget to pass one.
* In other word,
* (A), If parallelization is UNavailable, the two usage below is both right.
* (1) LAB_IMPLEMENT_BASIC_FUNCTION(type_func, func_entity, OMP_OVERHEAD_THRESHOLD) // discard the invalid threshold
* (2) LAB_IMPLEMENT_BASIC_FUNCTION(type_func, func_entity)
* (B), If parallelization is available, the two usage below is also both right.
* (1) LAB_IMPLEMENT_BASIC_FUNCTION(type_func, func_entity, OMP_OVERHEAD_THRESHOLD)
* (2) LAB_IMPLEMENT_BASIC_FUNCTION(type_func, func_entity) // pass the default threshold
* So do LAB_IMPLEMENT_VECTORIZED_FUNCTION.
*/
LAB_IMPLEMENT_BASIC_FUNCTION(neg,-)
#if defined(TH_REAL_IS_LONG)
LAB_IMPLEMENT_BASIC_FUNCTION(abs,std::abs)
#endif /* int64_t only part */
#if defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_CHAR)
LAB_IMPLEMENT_BASIC_FUNCTION(abs,abs)
#endif /* int only part */
#if defined(TH_REAL_IS_BYTE)
LAB_IMPLEMENT_BASIC_FUNCTION(abs,)
#endif /* for byte, identity due to it being unsigned */
/* floating point only now */
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
#if defined (TH_REAL_IS_FLOAT)
#define TH_MATH_NAME(fn) fn##f
#else
#define TH_MATH_NAME(fn) fn
#endif
LAB_IMPLEMENT_BASIC_FUNCTION(abs,TH_MATH_NAME(fabs))
LAB_IMPLEMENT_BASIC_FUNCTION(cosh,TH_MATH_NAME(cosh),HYPER_TH_OMP_OVERHEAD_THRESHOLD)
LAB_IMPLEMENT_BASIC_FUNCTION(tanh,TH_MATH_NAME(tanh),HYPER_TH_OMP_OVERHEAD_THRESHOLD)
void THTensor_(histc)(THTensor *hist, THTensor *tensor, int64_t nbins, scalar_t minvalue, scalar_t maxvalue)
{
if (nbins <= 0) {
THError("bins must be > 0");
}
scalar_t minval;
scalar_t maxval;
scalar_t *h_data;
THTensor_(resize1d)(hist, nbins);
THTensor_wrap(hist).zero_();
minval = minvalue;
maxval = maxvalue;
if (minval == maxval)
{
minval = THTensor_wrap(tensor).min().item<scalar_t>();
maxval = THTensor_wrap(tensor).max().item<scalar_t>();
}
if (minval == maxval)
{
minval = minval - 1;
maxval = maxval + 1;
}
TORCH_CHECK(!(std::isinf(minval) || std::isinf(maxval) || std::isnan(minval) || std::isnan(maxval)), "range of [", minval, ", ", maxval, "] is not finite");
TORCH_CHECK(minval < maxval, "max must be larger than min");
h_data = hist->data<scalar_t>();
TH_TENSOR_APPLY(scalar_t, tensor,
if (*tensor_data >= minval && *tensor_data <= maxval) {
const int bin = (int)((*tensor_data-minval) / (maxval-minval) * nbins);
h_data[THMin(bin, nbins-1)] += 1;
}
);
}
#endif
#undef TH_MATH_NAME
#endif /* floating point only part */
#undef IS_NONZERO
#endif /* !defined(TH_REAL_IS_BOOL) */
#endif /* TH_GENERIC_FILE */

View File

@ -1,18 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "TH/generic/THVector.h"
#else
#if !defined(TH_REAL_IS_BOOL) /* non bool only part */
TH_API void THVector_(neg)(scalar_t *y, const scalar_t *x, const ptrdiff_t n);
#endif /* non bool only part */
/* floating point only now */
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
TH_API void THVector_(erfc)(scalar_t *y, const scalar_t *x, const ptrdiff_t n);
TH_API void THVector_(pow)(scalar_t *y, const scalar_t *x, const scalar_t c, const ptrdiff_t n);
#endif /* floating point only part */
#endif

View File

@ -1078,11 +1078,9 @@ aten_native_source_non_codegen_list = [
"aten/src/ATen/native/sparse/SparseCsrTensor.cpp", "aten/src/ATen/native/sparse/SparseCsrTensor.cpp",
"aten/src/ATen/native/sparse/SparseTensorMath.cpp", "aten/src/ATen/native/sparse/SparseTensorMath.cpp",
"aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp", "aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp",
"aten/src/TH/THBlas.cpp",
"aten/src/TH/THGeneral.cpp", "aten/src/TH/THGeneral.cpp",
"aten/src/TH/THStorageFunctions.cpp", "aten/src/TH/THStorageFunctions.cpp",
"aten/src/TH/THTensor.cpp", "aten/src/TH/THTensor.cpp",
"aten/src/TH/THTensorMoreMath.cpp",
"aten/src/ATen/native/utils/Factory.cpp", "aten/src/ATen/native/utils/Factory.cpp",
"aten/src/ATen/native/xnnpack/Activation.cpp", "aten/src/ATen/native/xnnpack/Activation.cpp",
"aten/src/ATen/native/xnnpack/ChannelShuffle.cpp", "aten/src/ATen/native/xnnpack/ChannelShuffle.cpp",