mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
TH: Clean up dead code (#60655)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/60655 Test Plan: Imported from OSS Reviewed By: albanD Differential Revision: D29371717 Pulled By: ngimel fbshipit-source-id: faa71b1d4a15450c78e12aa917daec853057bce9
This commit is contained in:
parent
4a7d281119
commit
42c8439b6e
|
|
@ -332,11 +332,9 @@ filegroup(
|
|||
filegroup(
|
||||
name = "th_srcs",
|
||||
srcs = [
|
||||
"aten/src/TH/THBlas.cpp",
|
||||
"aten/src/TH/THGeneral.cpp",
|
||||
"aten/src/TH/THStorageFunctions.cpp",
|
||||
"aten/src/TH/THTensor.cpp",
|
||||
"aten/src/TH/THTensorMoreMath.cpp",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
@ -546,10 +544,6 @@ header_template_rule(
|
|||
src = "aten/src/TH/THGeneral.h.in",
|
||||
out = "aten/src/TH/THGeneral.h",
|
||||
substitutions = {
|
||||
"#cmakedefine USE_BLAS": "#define USE_BLAS",
|
||||
"#cmakedefine USE_LAPACK": "#define USE_LAPACK",
|
||||
"#cmakedefine BLAS_F2C": "/* #undef BLAS_F2C */",
|
||||
"#cmakedefine BLAS_USE_CBLAS_DOT": "#define BLAS_USE_CBLAS_DOT",
|
||||
},
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,12 @@
|
|||
set(Aten_TH_AVX_extra_src)
|
||||
|
||||
set(hdr
|
||||
THGeneral.h THHalf.h THStorage.h THStorageFunctions.h THTensor.h THTensorApply.h THBlas.h
|
||||
THVector.h )
|
||||
THGeneral.h THHalf.h THStorage.h THStorageFunctions.h THTensor.h)
|
||||
|
||||
set(ATen_TH_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THGeneral.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THStorageFunctions.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THTensor.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THTensorMoreMath.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THBlas.cpp
|
||||
)
|
||||
# Remember that PARENT_SCOPE variables are not in the current scope
|
||||
set(ATen_TH_SRCS ${ATen_TH_SRCS} PARENT_SCOPE)
|
||||
|
|
@ -36,7 +33,6 @@ configure_file(THGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h")
|
|||
|
||||
install(FILES
|
||||
TH.h
|
||||
THBlas.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h
|
||||
THGenerateAllTypes.h
|
||||
THGenerateBFloat16Type.h
|
||||
|
|
@ -62,17 +58,12 @@ install(FILES
|
|||
THStorage.h
|
||||
THStorageFunctions.h
|
||||
THTensor.h
|
||||
THTensorApply.h
|
||||
THTensorDimApply.h
|
||||
THVector.h
|
||||
THHalf.h
|
||||
THTensor.hpp
|
||||
THStorageFunctions.hpp
|
||||
DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/TH")
|
||||
|
||||
install(FILES
|
||||
generic/THBlas.cpp
|
||||
generic/THBlas.h
|
||||
generic/THStorage.cpp
|
||||
generic/THStorage.h
|
||||
generic/THStorageCopy.cpp
|
||||
|
|
@ -80,8 +71,5 @@ install(FILES
|
|||
generic/THTensor.cpp
|
||||
generic/THTensor.h
|
||||
generic/THTensor.hpp
|
||||
generic/THTensorMath.h
|
||||
generic/THVector.h
|
||||
# See Note [TH abstraction violation]
|
||||
generic/THTensorFastGetSet.hpp
|
||||
DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/TH/generic")
|
||||
|
|
|
|||
|
|
@ -3,11 +3,7 @@
|
|||
|
||||
#include <TH/THGeneral.h>
|
||||
|
||||
#include <TH/THBlas.h>
|
||||
#include <TH/THVector.h>
|
||||
#include <TH/THStorageFunctions.h>
|
||||
#include <TH/THTensor.h>
|
||||
#include <TH/THTensorApply.h>
|
||||
#include <TH/THTensorDimApply.h>
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,13 +0,0 @@
|
|||
#include <TH/THBlas.h>
|
||||
|
||||
// NOLINTNEXTLINE(bugprone-suspicious-include)
|
||||
#include <TH/generic/THBlas.cpp>
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
// NOLINTNEXTLINE(bugprone-suspicious-include)
|
||||
#include <TH/generic/THBlas.cpp>
|
||||
#include <TH/THGenerateBFloat16Type.h>
|
||||
|
||||
// NOLINTNEXTLINE(bugprone-suspicious-include)
|
||||
#include <TH/generic/THBlas.cpp>
|
||||
#include <TH/THGenerateHalfType.h>
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
#ifndef TH_BLAS_INC
|
||||
#define TH_BLAS_INC
|
||||
|
||||
#include <TH/THGeneral.h>
|
||||
|
||||
#define THBlas_(NAME) TH_CONCAT_4(TH,Real,Blas_,NAME)
|
||||
|
||||
#include <TH/generic/THBlas.h>
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
#include <TH/generic/THBlas.h>
|
||||
#include <TH/THGenerateBFloat16Type.h>
|
||||
|
||||
#include <TH/generic/THBlas.h>
|
||||
#include <TH/THGenerateHalfType.h>
|
||||
|
||||
#endif
|
||||
|
|
@ -68,21 +68,6 @@ void _THAssertionFailed(const char *file, const int line, const char *exp, const
|
|||
_THError(file, line, "Assertion `%s' failed. %s", exp, msg);
|
||||
}
|
||||
|
||||
void THSetErrorHandler(THErrorHandlerFunction new_handler, void *data)
|
||||
{
|
||||
threadErrorHandler = new_handler;
|
||||
threadErrorHandlerData = data;
|
||||
}
|
||||
|
||||
void THSetDefaultErrorHandler(THErrorHandlerFunction new_handler, void *data)
|
||||
{
|
||||
if (new_handler)
|
||||
defaultErrorHandler = new_handler;
|
||||
else
|
||||
defaultErrorHandler = defaultErrorHandlerFunction;
|
||||
defaultErrorHandlerData = data;
|
||||
}
|
||||
|
||||
/* Torch Arg Checking Handling */
|
||||
static void defaultArgErrorHandlerFunction(int argNumber, const char *msg, void *data)
|
||||
{
|
||||
|
|
@ -125,42 +110,6 @@ void _THArgCheck(const char *file, int line, int condition, int argNumber, const
|
|||
}
|
||||
}
|
||||
|
||||
void THSetArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data)
|
||||
{
|
||||
threadArgErrorHandler = new_handler;
|
||||
threadArgErrorHandlerData = data;
|
||||
}
|
||||
|
||||
void THSetDefaultArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data)
|
||||
{
|
||||
if (new_handler)
|
||||
defaultArgErrorHandler = new_handler;
|
||||
else
|
||||
defaultArgErrorHandler = defaultArgErrorHandlerFunction;
|
||||
defaultArgErrorHandlerData = data;
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(modernize-use-nullptr,cppcoreguidelines-avoid-non-const-global-variables)
|
||||
static __thread void (*torchGCFunction)(void *data) = NULL;
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
static __thread void *torchGCData;
|
||||
|
||||
/* Optional hook for integrating with a garbage-collected frontend.
|
||||
*
|
||||
* If torch is running with a garbage-collected frontend (e.g. Lua),
|
||||
* the GC isn't aware of TH-allocated memory so may not know when it
|
||||
* needs to run. These hooks trigger the GC to run in two cases:
|
||||
*
|
||||
* (1) When a memory allocation (malloc, realloc, ...) fails
|
||||
* (2) When the total TH-allocated memory hits a dynamically-adjusted
|
||||
* soft maximum.
|
||||
*/
|
||||
void THSetGCHandler( void (*torchGCFunction_)(void *data), void *data )
|
||||
{
|
||||
torchGCFunction = torchGCFunction_;
|
||||
torchGCData = data;
|
||||
}
|
||||
|
||||
void* THAlloc(ptrdiff_t size)
|
||||
{
|
||||
if(size < 0)
|
||||
|
|
@ -169,63 +118,7 @@ void* THAlloc(ptrdiff_t size)
|
|||
return c10::alloc_cpu(size);
|
||||
}
|
||||
|
||||
void* THRealloc(void *ptr, ptrdiff_t size)
|
||||
{
|
||||
if(!ptr)
|
||||
return(THAlloc(size));
|
||||
|
||||
if(size == 0)
|
||||
{
|
||||
THFree(ptr);
|
||||
// NOLINTNEXTLINE(modernize-use-nullptr)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(size < 0)
|
||||
THError("$ Torch: invalid memory size -- maybe an overflow?");
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
|
||||
void *newptr = realloc(ptr, size);
|
||||
|
||||
if(!newptr && torchGCFunction) {
|
||||
torchGCFunction(torchGCData);
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
|
||||
newptr = realloc(ptr, size);
|
||||
}
|
||||
|
||||
if(!newptr)
|
||||
THError("$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824);
|
||||
|
||||
return newptr;
|
||||
}
|
||||
|
||||
void THFree(void *ptr)
|
||||
{
|
||||
c10::free_cpu(ptr);
|
||||
}
|
||||
|
||||
THDescBuff _THSizeDesc(const int64_t *size, const int64_t ndim) {
|
||||
const int L = TH_DESC_BUFF_LEN;
|
||||
THDescBuff buf;
|
||||
char *str = buf.str;
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
||||
int64_t i;
|
||||
int64_t n = 0;
|
||||
n += snprintf(str, L-n, "[");
|
||||
|
||||
for (i = 0; i < ndim; i++) {
|
||||
if (n >= L) break;
|
||||
n += snprintf(str+n, L-n, "%" PRId64, size[i]);
|
||||
if (i < ndim-1) {
|
||||
n += snprintf(str+n, L-n, " x ");
|
||||
}
|
||||
}
|
||||
|
||||
if (n < L - 2) {
|
||||
snprintf(str+n, L-n, "]");
|
||||
} else {
|
||||
snprintf(str+L-5, 5, "...]");
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,11 +21,6 @@
|
|||
#include <mkl_vsl.h>
|
||||
#endif
|
||||
|
||||
#cmakedefine USE_BLAS
|
||||
#cmakedefine USE_LAPACK
|
||||
#cmakedefine BLAS_F2C
|
||||
#cmakedefine BLAS_USE_CBLAS_DOT
|
||||
|
||||
# define TH_EXTERNC extern "C"
|
||||
|
||||
// Note(jiayq): copied from ATen/core/Macros.h. Because internal build of TH
|
||||
|
|
@ -72,26 +67,12 @@
|
|||
typedef void (*THErrorHandlerFunction)(const char *msg, void *data);
|
||||
typedef void (*THArgErrorHandlerFunction)(int argNumber, const char *msg, void *data);
|
||||
|
||||
#define TH_DESC_BUFF_LEN 64
|
||||
typedef struct {
|
||||
char str[TH_DESC_BUFF_LEN];
|
||||
} THDescBuff;
|
||||
|
||||
|
||||
TH_API THDescBuff _THSizeDesc(const int64_t *size, const int64_t ndim);
|
||||
TH_API TH_NO_RETURN void _THError(const char *file, const int line, const char *fmt, ...);
|
||||
TH_API void _THAssertionFailed(const char *file, const int line, const char *exp, const char *fmt, ...);
|
||||
TH_API void THSetErrorHandler(THErrorHandlerFunction new_handler, void *data);
|
||||
TH_API void THSetDefaultErrorHandler(THErrorHandlerFunction new_handler, void *data);
|
||||
TH_API void _THArgCheck(const char *file, int line, int condition, int argNumber, const char *fmt, ...);
|
||||
TH_API void THSetArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data);
|
||||
TH_API void THSetDefaultArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data);
|
||||
TH_API void* THAlloc(ptrdiff_t size);
|
||||
TH_API void* THRealloc(void *ptr, ptrdiff_t size);
|
||||
TH_API void THFree(void *ptr);
|
||||
TH_API void THSetGCHandler( void (*torchGCHandlerFunction)(void *data), void *data );
|
||||
// this hook should only be called by custom allocator functions
|
||||
TH_API void THHeapUpdate(ptrdiff_t size);
|
||||
|
||||
#define THError(...) _THError(__FILE__, __LINE__, __VA_ARGS__)
|
||||
|
||||
|
|
|
|||
|
|
@ -36,42 +36,3 @@ void THTensor_setStorage(THTensor *self, THStorage *storage_, ptrdiff_t storageO
|
|||
c10::raw::intrusive_ptr::incref(storage_);
|
||||
THTensor_wrap(self).set_(at::Storage(c10::intrusive_ptr<at::StorageImpl>::reclaim(storage_)), storageOffset_, size_, stride_);
|
||||
}
|
||||
|
||||
void THTensor_resize(THTensor *self, at::IntArrayRef size, at::IntArrayRef stride)
|
||||
{
|
||||
if (stride.data()) {
|
||||
THArgCheck(stride.size() == size.size(), 3, "invalid stride");
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
THAssert(size.size() <= INT_MAX);
|
||||
#endif
|
||||
THTensor_resizeNd(self, size.size(), size.data(), stride.data());
|
||||
}
|
||||
|
||||
void THTensor_resizeNd(THTensor *self, int nDimension, const int64_t *size, const int64_t *stride)
|
||||
{
|
||||
TORCH_CHECK(nDimension >= 0, "resizeNd nDimension must be non-negative");
|
||||
at::IntArrayRef sizes(size, nDimension);
|
||||
at::optional<at::IntArrayRef> strides;
|
||||
if (stride) {
|
||||
strides = at::IntArrayRef(stride, nDimension);
|
||||
}
|
||||
at::native::resize_impl_cpu_(self, sizes, strides);
|
||||
}
|
||||
|
||||
// NB: Steals ownership of storage
|
||||
void THTensor_stealAndSetStoragePtr(THTensor* tensor, THStorage* storage) {
|
||||
// Caffe2 might have tensors whose storages are null, but we
|
||||
// don't allow it in PyTorch.
|
||||
AT_ASSERT(storage);
|
||||
|
||||
// We used to allow this, but this breaks device caching.
|
||||
// Let's put an actual error message for this one.
|
||||
TORCH_CHECK(tensor->storage().device() == storage->device(),
|
||||
"Attempted to set the storage of a tensor on device \"", tensor->storage().device(),
|
||||
"\" to a storage on different device \"", storage->device(),
|
||||
"\". This is no longer allowed; the devices must match.");
|
||||
tensor->set_storage_keep_dtype(
|
||||
at::Storage(c10::intrusive_ptr<THStorage>::reclaim(storage)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#define TH_TENSOR_INC
|
||||
|
||||
#include <TH/THStorageFunctions.h>
|
||||
#include <TH/THTensorApply.h>
|
||||
|
||||
#define THTensor_(NAME) TH_CONCAT_4(TH,Real,Tensor_,NAME)
|
||||
|
||||
|
|
@ -21,20 +20,4 @@
|
|||
|
||||
#include <TH/generic/THTensor.h>
|
||||
#include <TH/THGenerateBFloat16Type.h>
|
||||
|
||||
/* maths */
|
||||
#include <TH/generic/THTensorMath.h>
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
#include <TH/generic/THTensorMath.h>
|
||||
#include <TH/THGenerateBoolType.h>
|
||||
|
||||
#include <TH/generic/THTensorMath.h>
|
||||
#include <TH/THGenerateHalfType.h>
|
||||
|
||||
#include <TH/generic/THTensorMath.h>
|
||||
#include <TH/THGenerateBFloat16Type.h>
|
||||
|
||||
#include <TH/generic/THTensorMath.h>
|
||||
#include <TH/THGenerateComplexTypes.h>
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -82,14 +82,6 @@ inline int64_t THTensor_sizeLegacyNoScalars(const THTensor *self, int dim)
|
|||
return self->dim() == 0 ? 1 : self->size(dim);
|
||||
}
|
||||
|
||||
#include <TH/generic/THTensorFastGetSet.hpp>
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
#include <TH/generic/THTensorFastGetSet.hpp>
|
||||
#include <TH/THGenerateComplexTypes.h>
|
||||
|
||||
#include <TH/generic/THTensorFastGetSet.hpp>
|
||||
#include <TH/THGenerateBFloat16Type.h>
|
||||
|
||||
inline std::vector<int64_t> THTensor_sizesLegacyNoScalars(const THTensor *self) {
|
||||
if (self->dim() == 0) {
|
||||
|
|
@ -98,20 +90,7 @@ inline std::vector<int64_t> THTensor_sizesLegacyNoScalars(const THTensor *self)
|
|||
return self->sizes().vec();
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<int64_t> THTensor_stridesLegacyNoScalars(const THTensor *self) {
|
||||
if (self->dim() == 0) {
|
||||
return {1};
|
||||
} else {
|
||||
return self->strides().vec();
|
||||
}
|
||||
}
|
||||
|
||||
// NB: Steals ownership of storage
|
||||
TH_API void THTensor_stealAndSetStoragePtr(THTensor* tensor, THStorage* storage);
|
||||
|
||||
TH_API void THTensor_free(THTensor *self);
|
||||
TH_API void THTensor_resizeNd(THTensor *self, int nDimension, const int64_t *size, const int64_t *stride);
|
||||
|
||||
TH_CPP_API void THTensor_resize(THTensor *self, at::IntArrayRef size, at::IntArrayRef stride);
|
||||
TH_CPP_API void THTensor_setStorage(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_);
|
||||
|
|
|
|||
|
|
@ -1,309 +0,0 @@
|
|||
#ifndef TH_TENSOR_APPLY_INC
|
||||
#define TH_TENSOR_APPLY_INC
|
||||
|
||||
#include <ATen/Parallel.h>
|
||||
|
||||
/*
|
||||
* The basic strategy for apply is as follows:
|
||||
*
|
||||
* 1. Starting with the outermost index, loop until we reach a dimension where the
|
||||
* data is no longer contiguous, i.e. the stride at that dimension is not equal to
|
||||
* the size of the tensor defined by the outer dimensions. Let's call this outer
|
||||
* (contiguous) tensor A. Note that if the Tensor is contiguous, then A is equal
|
||||
* to the entire Tensor. Let's call the inner tensor B.
|
||||
*
|
||||
* 2. We loop through the indices in B, starting at its outermost dimension. For
|
||||
* example, if B is a 2x2 matrix, then we do:
|
||||
*
|
||||
* B[0][0]
|
||||
* B[0][1]
|
||||
* B[1][0]
|
||||
* B[1][1]
|
||||
*
|
||||
* We set the offset into the underlying storage as (storageOffset + stride_B * index_B),
|
||||
* i.e. basically we compute the offset into the storage as we would normally for a
|
||||
* Tensor. But because we are guaranteed the subsequent data is contiguous in memory, we
|
||||
* can simply loop for sizeof(A) iterations and perform the operation, without having to
|
||||
* follow the order described by the strides of A.
|
||||
*
|
||||
* 3. As an optimization, we merge dimensions of A that are contiguous in memory. For
|
||||
* example, if A is a 3x3x3x3 tensor narrowed from a 3x3x4x3 tensor, then the first two
|
||||
* dimensions can be merged for the purposes of APPLY, reducing the number of nested
|
||||
* loops.
|
||||
*/
|
||||
|
||||
#define __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, ALLOW_CONTIGUOUS) \
|
||||
TYPE *TENSOR##_data = NULL; \
|
||||
int64_t *TENSOR##_counter = NULL, *TENSOR##_sizes = NULL, *TENSOR##_strides = NULL, *TENSOR##_dimOffset = NULL; \
|
||||
int64_t TENSOR##_stride = 0, TENSOR##_size = 0, TENSOR##_dim = 0, TENSOR##_i, TENSOR##_n; \
|
||||
int TENSOR##_contiguous = ALLOW_CONTIGUOUS && DIM < 0; \
|
||||
TENSOR##_n = 1; \
|
||||
for(TENSOR##_i = 0; TENSOR##_i < TENSOR->dim(); TENSOR##_i++) \
|
||||
TENSOR##_n *= TENSOR->size(TENSOR##_i); \
|
||||
\
|
||||
if(TENSOR->is_empty()) \
|
||||
TH_TENSOR_APPLY_hasFinished = 1; \
|
||||
else \
|
||||
{ \
|
||||
TENSOR##_data = THTensor_getStoragePtr(TENSOR)->data<TYPE>()+TENSOR->storage_offset(); \
|
||||
TENSOR##_size = 1; \
|
||||
TENSOR##_stride = 1; \
|
||||
for(TENSOR##_i = THTensor_nDimensionLegacyAll(TENSOR)-1; TENSOR##_i >= 0; TENSOR##_i--) { \
|
||||
if(THTensor_sizeLegacyNoScalars(TENSOR, TENSOR##_i) != 1) { \
|
||||
if(THTensor_strideLegacyNoScalars(TENSOR, TENSOR##_i) == TENSOR##_size && TENSOR##_i != DIM) \
|
||||
TENSOR##_size *= THTensor_sizeLegacyNoScalars(TENSOR, TENSOR##_i); \
|
||||
else{ \
|
||||
TENSOR##_contiguous = 0; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (!TENSOR##_contiguous) { \
|
||||
/* Find the dimension of contiguous sections */ \
|
||||
TENSOR##_dim = 1; \
|
||||
for(TENSOR##_i = THTensor_nDimensionLegacyAll(TENSOR)-2; TENSOR##_i >= 0; TENSOR##_i--) \
|
||||
{ \
|
||||
if(TENSOR->stride(TENSOR##_i) != TENSOR->stride(TENSOR##_i+1) * TENSOR->size(TENSOR##_i+1) || TENSOR##_i == DIM || TENSOR##_i+1 == DIM) \
|
||||
TENSOR##_dim++; \
|
||||
} \
|
||||
/* Allocate an array of 3*dim elements, where dim is the number of contiguous sections */ \
|
||||
TENSOR##_counter = (int64_t*)THAlloc(sizeof(int64_t)*(3*TENSOR##_dim)); \
|
||||
TENSOR##_sizes = TENSOR##_counter + TENSOR##_dim; \
|
||||
TENSOR##_strides = TENSOR##_counter + 2*TENSOR##_dim; \
|
||||
TH_TENSOR_dim_index = TENSOR##_dim-1; \
|
||||
TENSOR##_dimOffset = (DIM == THTensor_nDimensionLegacyAll(TENSOR)-1) ? &TENSOR##_i : &TENSOR##_counter[DIM]; \
|
||||
TENSOR##_sizes[TH_TENSOR_dim_index] = THTensor_sizeLegacyNoScalars(TENSOR, THTensor_nDimensionLegacyAll(TENSOR)-1); \
|
||||
TENSOR##_strides[TH_TENSOR_dim_index] = THTensor_strideLegacyNoScalars(TENSOR, THTensor_nDimensionLegacyAll(TENSOR)-1); \
|
||||
/* TENSOR##_counter tracks where we are in the storage. The offset into the */ \
|
||||
/* storage is given by storage_offset + (i * j), where i is the stride */ \
|
||||
/* vector and j is tensor_counter vector. This sets the starting position for the loop. */ \
|
||||
for(TENSOR##_i = TENSOR##_dim-1; TENSOR##_i >= 0; --TENSOR##_i) { \
|
||||
TENSOR##_counter[TENSOR##_i] = 0; \
|
||||
} \
|
||||
for(TENSOR##_i = THTensor_nDimensionLegacyAll(TENSOR)-2; TENSOR##_i >= 0; --TENSOR##_i) { \
|
||||
if (TENSOR->stride(TENSOR##_i) == TENSOR->stride(TENSOR##_i+1) * TENSOR->size(TENSOR##_i+1) && TENSOR##_i != DIM && TENSOR##_i+1 != DIM) { \
|
||||
TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size(TENSOR##_i) * TENSOR##_sizes[TH_TENSOR_dim_index]; \
|
||||
if (DIM != THTensor_nDimensionLegacyAll(TENSOR)-1 && TENSOR##_i < DIM) \
|
||||
TENSOR##_dimOffset--; \
|
||||
} else { \
|
||||
--TH_TENSOR_dim_index; \
|
||||
TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size(TENSOR##_i); \
|
||||
TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride(TENSOR##_i); \
|
||||
} \
|
||||
} \
|
||||
/* Size of the inner most section */ \
|
||||
TENSOR##_size = TENSOR##_sizes[TENSOR##_dim-1]; \
|
||||
/* Stride of the inner most section */ \
|
||||
TENSOR##_stride = TENSOR##_strides[TENSOR##_dim-1]; \
|
||||
} \
|
||||
else{\
|
||||
TENSOR##_dim = 1;\
|
||||
TENSOR##_counter = (int64_t*)THAlloc(sizeof(int64_t)*3);\
|
||||
TENSOR##_sizes = TENSOR##_counter + 1;\
|
||||
TENSOR##_strides = TENSOR##_counter + 2;\
|
||||
TENSOR##_sizes[0] = TENSOR##_n;\
|
||||
TENSOR##_strides[0] = 1;\
|
||||
TENSOR##_size = TENSOR##_sizes[0];\
|
||||
TENSOR##_stride = TENSOR##_strides[0];\
|
||||
}\
|
||||
} \
|
||||
TENSOR##_i = 0;
|
||||
|
||||
#define __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, ALWAYS_UPDATE) \
|
||||
if(TENSOR##_i == TENSOR##_size || ALWAYS_UPDATE) \
|
||||
{ \
|
||||
if(TENSOR##_contiguous) \
|
||||
break; \
|
||||
\
|
||||
if(TENSOR##_dim == 1) \
|
||||
break; \
|
||||
\
|
||||
/* Reset pointer to beginning of loop */ \
|
||||
TENSOR##_data -= TENSOR##_size*TENSOR##_stride; \
|
||||
for(TENSOR##_i = TENSOR##_dim-2; TENSOR##_i >= 0; TENSOR##_i--) \
|
||||
{ \
|
||||
TENSOR##_counter[TENSOR##_i]++; \
|
||||
/* Jump ahread by the stride of this dimension */ \
|
||||
TENSOR##_data += TENSOR##_strides[TENSOR##_i]; \
|
||||
\
|
||||
if(TENSOR##_counter[TENSOR##_i] == TENSOR##_sizes[TENSOR##_i]) \
|
||||
{ \
|
||||
if(TENSOR##_i == 0) \
|
||||
{ \
|
||||
TH_TENSOR_APPLY_hasFinished = 1; \
|
||||
break; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* Reset the pointer to the beginning of the chunk defined by this dimension */ \
|
||||
TENSOR##_data -= TENSOR##_counter[TENSOR##_i]*TENSOR##_strides[TENSOR##_i]; \
|
||||
TENSOR##_counter[TENSOR##_i] = 0; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
break; \
|
||||
} \
|
||||
TENSOR##_i = 0; \
|
||||
} \
|
||||
|
||||
#define TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIM, CODE) \
|
||||
{ \
|
||||
int TH_TENSOR_APPLY_hasFinished = 0; \
|
||||
int64_t TH_TENSOR_dim_index = 0; \
|
||||
__TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \
|
||||
__TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \
|
||||
__TH_TENSOR_APPLYX_PREAMBLE(TYPE3, TENSOR3, DIM, 1) \
|
||||
\
|
||||
int elements_equal = 1; \
|
||||
if(TENSOR1##_n != TENSOR2##_n) { \
|
||||
elements_equal = 0; \
|
||||
} \
|
||||
else if(TENSOR1##_n != TENSOR3##_n) { \
|
||||
elements_equal = 0; \
|
||||
} \
|
||||
if (elements_equal == 0) { \
|
||||
AT_ERROR("inconsistent tensor size, expected ", \
|
||||
#TENSOR1, " ", TENSOR1->sizes(), ", ", \
|
||||
#TENSOR2, " ", TENSOR2->sizes(), " and ", \
|
||||
#TENSOR3, " ", TENSOR3->sizes(), " to have the same " \
|
||||
"number of elements, but got ", TENSOR1##_n, ", ", \
|
||||
TENSOR2##_n, " and ", TENSOR3##_n, " elements respectively"); \
|
||||
} \
|
||||
\
|
||||
while(!TH_TENSOR_APPLY_hasFinished) \
|
||||
{ \
|
||||
/* Loop through the inner most region of the Tensor */ \
|
||||
for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size && TENSOR3##_i < TENSOR3##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR3##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride, TENSOR3##_data += TENSOR3##_stride) /* 0 et pas TENSOR##_dim! */ \
|
||||
{ \
|
||||
CODE \
|
||||
} \
|
||||
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \
|
||||
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \
|
||||
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR3, 0) \
|
||||
} \
|
||||
if(TENSOR1##_counter != NULL) \
|
||||
THFree(TENSOR1##_counter); \
|
||||
if(TENSOR2##_counter != NULL) \
|
||||
THFree(TENSOR2##_counter); \
|
||||
if(TENSOR3##_counter != NULL) \
|
||||
THFree(TENSOR3##_counter); \
|
||||
}
|
||||
|
||||
#define TH_TENSOR_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \
|
||||
TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, -1, CODE)
|
||||
|
||||
#define TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, DIM, CODE) \
|
||||
{ \
|
||||
int TH_TENSOR_APPLY_hasFinished = 0; \
|
||||
int64_t TH_TENSOR_dim_index = 0; \
|
||||
__TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \
|
||||
__TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \
|
||||
\
|
||||
if(TENSOR1##_n != TENSOR2##_n) { \
|
||||
AT_ERROR("inconsistent tensor size, expected ", \
|
||||
#TENSOR1, " ", TENSOR1->sizes(), " and ", \
|
||||
#TENSOR2, " ", TENSOR2->sizes(), \
|
||||
" to have the same number of elements, but got ", \
|
||||
TENSOR1##_n, " and ", TENSOR2##_n, " elements respectively"); \
|
||||
} \
|
||||
while(!TH_TENSOR_APPLY_hasFinished) \
|
||||
{ \
|
||||
/* Loop through the inner most region of the Tensor */ \
|
||||
for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride) /* 0 et pas TENSOR##_dim! */ \
|
||||
{ \
|
||||
CODE \
|
||||
} \
|
||||
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \
|
||||
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \
|
||||
} \
|
||||
if(TENSOR1##_counter != NULL) \
|
||||
THFree(TENSOR1##_counter); \
|
||||
if(TENSOR2##_counter != NULL) \
|
||||
THFree(TENSOR2##_counter); \
|
||||
}
|
||||
|
||||
#define TH_TENSOR_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
|
||||
TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, -1, CODE)
|
||||
|
||||
#define TH_TENSOR_APPLY_D(TYPE, TENSOR, DIM, CODE) \
|
||||
{ \
|
||||
int TH_TENSOR_APPLY_hasFinished = 0; \
|
||||
int64_t TH_TENSOR_dim_index = 0; \
|
||||
__TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, 0) \
|
||||
\
|
||||
while(!TH_TENSOR_APPLY_hasFinished) \
|
||||
{ \
|
||||
/* Loop through the inner most region of the Tensor */ \
|
||||
for(; TENSOR##_i < TENSOR##_size; TENSOR##_i++, TENSOR##_data += TENSOR##_stride) /* 0 et pas TENSOR##_dim! */ \
|
||||
{ \
|
||||
CODE \
|
||||
} \
|
||||
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, 1) \
|
||||
} \
|
||||
THFree(TENSOR##_counter); \
|
||||
}
|
||||
|
||||
#define TH_TENSOR_APPLY(TYPE, TENSOR, CODE) \
|
||||
TH_TENSOR_APPLY_D(TYPE, TENSOR, -1, CODE)
|
||||
|
||||
|
||||
/*
|
||||
* Calcuate the memory offset of an element in a tensor. The strategy is below:
|
||||
*
|
||||
* 1. convert the line index(the index of the element) to the indexs(coordinates) in the tensor.
|
||||
* It can hinted by a classical problem: Getting each individual digit from a whole integer(Decimal base).
|
||||
* A N-digit decimal base number could be view as a N-dimension tensor and the sizes of the tensor are 10.
|
||||
* So the value the whole integer is the line index. And the digits could be viewed as the indexes in
|
||||
* different dimensions.
|
||||
*
|
||||
* 2. convert the indexs(coordinates) in the tensor to the memory offset.
|
||||
*
|
||||
* You can get the detailes in the for-statement iterations.
|
||||
*
|
||||
* The macro is only used in the first element in each thread. For the rest, the memory offset could update
|
||||
* according to info of the tensor in order to get better performance. So we should also record the each
|
||||
* indexs in coresponding dimension of first element.
|
||||
* The recorded info is stored in the TENSOR##_counter_tmp.
|
||||
*
|
||||
*/
|
||||
#define __TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR) \
|
||||
int64_t *TENSOR##_counter_tmp = (int64_t*)THAlloc(sizeof(int64_t) * TENSOR##_dim); \
|
||||
ptrdiff_t TENSOR##_memory_offset = 0; \
|
||||
ptrdiff_t TENSOR##_quot = line_index_start; \
|
||||
for (TENSOR##_i = TENSOR##_dim-1; TENSOR##_i>=0; --TENSOR##_i) { \
|
||||
TENSOR##_counter_tmp[TENSOR##_i] = TENSOR##_quot%TENSOR##_sizes[TENSOR##_i]; \
|
||||
TENSOR##_quot /= TENSOR##_sizes[TENSOR##_i]; \
|
||||
TENSOR##_memory_offset += TENSOR##_counter_tmp[TENSOR##_i] * TENSOR##_strides[TENSOR##_i]; \
|
||||
}
|
||||
|
||||
/*
|
||||
* The macro update the indexes in each dimension of the elements except for the first one allocated in
|
||||
* each thread.
|
||||
* For a tensor, if the index of some dimension reaches the size of the corresponding dimension. It will carry and clear.
|
||||
* If the index of next high dimension does do, the index of next high dimension should carry and clear, too.
|
||||
*
|
||||
* The momery offset calculatation is a little confusing. If current index carries, the current index is set to 0. So
|
||||
* the offset should decrease by size*stride of the last dimension. Then the index next high dimension increases by 1. So
|
||||
* the offset should increase by stride of next high dimension.
|
||||
*/
|
||||
#define __TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR) \
|
||||
if(TENSOR##_i == TENSOR##_size && TENSOR##_dim > 1){ /*reaches the edge*/ \
|
||||
int TENSOR##_carry_coord = 1; /*set carry flag to true*/ \
|
||||
TENSOR##_start = 0; /*the current index be cleared to 0*/\
|
||||
TENSOR##_data -= TENSOR##_size * TENSOR##_stride; /*the momery offset reset to the first one in current dimension */\
|
||||
for(TENSOR##_i = TENSOR##_dim - 2; (TENSOR##_i >= 0) && (TENSOR##_carry_coord); TENSOR##_i--){ \
|
||||
TENSOR##_counter_tmp[TENSOR##_i]++; /*the index of next high dimension update*/ \
|
||||
TENSOR##_data += TENSOR##_strides[TENSOR##_i]; /*memory offset increase by stride of next high dimension*/\
|
||||
if(TENSOR##_counter_tmp[TENSOR##_i] == TENSOR##_sizes[TENSOR##_i]){ /*The next high dimension also carry, continue
|
||||
to clear and carry*/ \
|
||||
TENSOR##_data -= TENSOR##_sizes[TENSOR##_i] * TENSOR##_strides[TENSOR##_i]; \
|
||||
TENSOR##_counter_tmp[TENSOR##_i] = 0; \
|
||||
} else { \
|
||||
TENSOR##_carry_coord = 0; \
|
||||
} \
|
||||
} \
|
||||
} else { \
|
||||
TENSOR##_start = TENSOR##_i; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,329 +0,0 @@
|
|||
#ifndef TH_TENSOR_DIM_APPLY_INC
|
||||
#define TH_TENSOR_DIM_APPLY_INC
|
||||
|
||||
// This is an example of SIZE_CHECK argument passable to TH_TENSOR_DIM_APPLY3.
|
||||
// The TENSOR1, TENSOR2, TENSOR3, DIMENSION will be expanded the same way as
|
||||
// TH_TENSOR_DIM_APPLY3.
|
||||
// Specifically, this check ensures that TENSOR1, TENSOR2, TENSOR3 have same
|
||||
// size except for DIMENSION.
|
||||
#define TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM(TENSOR1, TENSOR2, TENSOR3, DIMENSION) \
|
||||
{ \
|
||||
int shape_check_flag = 0; \
|
||||
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyNoScalars(TENSOR1); TH_TENSOR_DIM_APPLY_i++) \
|
||||
{ \
|
||||
if (TH_TENSOR_DIM_APPLY_i == DIMENSION) \
|
||||
continue; \
|
||||
if (TENSOR1->size(TH_TENSOR_DIM_APPLY_i) != TENSOR2->size(TH_TENSOR_DIM_APPLY_i)) { \
|
||||
shape_check_flag = 1; \
|
||||
break; \
|
||||
} \
|
||||
if(TENSOR1->size(TH_TENSOR_DIM_APPLY_i) != TENSOR3->size(TH_TENSOR_DIM_APPLY_i)) { \
|
||||
shape_check_flag = 1; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
if (shape_check_flag == 1) { \
|
||||
AT_ERROR("Expected ", #TENSOR1, " ", TENSOR1->sizes(), ", ", #TENSOR2, " ", TENSOR2->sizes(), " and ", #TENSOR3, " ", TENSOR3->sizes(), " to have the same size apart from dimension ", DIMENSION); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TH_TENSOR_DIM_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIMENSION, SIZE_CHECK, CODE) \
|
||||
{ \
|
||||
TYPE1 *TENSOR1##_data = NULL; \
|
||||
TH_UNUSED int64_t TENSOR1##_stride = 0, TENSOR1##_size = 0; \
|
||||
TYPE2 *TENSOR2##_data = NULL; \
|
||||
TH_UNUSED int64_t TENSOR2##_stride = 0, TENSOR2##_size = 0; \
|
||||
TYPE3 *TENSOR3##_data = NULL; \
|
||||
TH_UNUSED int64_t TENSOR3##_stride = 0, TENSOR3##_size = 0; \
|
||||
int64_t *TH_TENSOR_DIM_APPLY_counter = NULL; \
|
||||
int TH_TENSOR_DIM_APPLY_hasFinished = THTensor_(numel)(TENSOR1) == 0; \
|
||||
int TH_TENSOR_DIM_APPLY_i; \
|
||||
\
|
||||
if( (DIMENSION < 0) || (DIMENSION >= THTensor_nDimensionLegacyNoScalars(TENSOR1)) ) \
|
||||
THError("invalid dimension %d (expected to be 0 <= dim < %d)", DIMENSION, THTensor_nDimensionLegacyNoScalars(TENSOR1)); \
|
||||
int same_dims = 1; \
|
||||
if( THTensor_nDimensionLegacyNoScalars(TENSOR1) != THTensor_nDimensionLegacyNoScalars(TENSOR2) ) { \
|
||||
same_dims = 0; \
|
||||
} \
|
||||
if( THTensor_nDimensionLegacyNoScalars(TENSOR1) != THTensor_nDimensionLegacyNoScalars(TENSOR3) ) { \
|
||||
same_dims = 0; \
|
||||
} \
|
||||
if (same_dims == 0) { \
|
||||
AT_ERROR("inconsistent tensor size, expected ", #TENSOR1, " ", TENSOR1->sizes(), ", ", #TENSOR2, " ", TENSOR2->sizes(), " and ", #TENSOR3, " ",TENSOR3->sizes() , " to have the same number of dimensions"); \
|
||||
} \
|
||||
SIZE_CHECK(TENSOR1, TENSOR2, TENSOR3, DIMENSION) \
|
||||
\
|
||||
if (TH_TENSOR_DIM_APPLY_hasFinished) { \
|
||||
return; \
|
||||
} \
|
||||
TH_TENSOR_DIM_APPLY_counter = (int64_t*)THAlloc(sizeof(int64_t)*(THTensor_nDimensionLegacyNoScalars(TENSOR1))); \
|
||||
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyNoScalars(TENSOR1); TH_TENSOR_DIM_APPLY_i++) \
|
||||
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
|
||||
\
|
||||
TENSOR1##_data = THTensor_getStoragePtr(TENSOR1)->data<TYPE1>()+(TENSOR1)->storage_offset(); \
|
||||
TENSOR1##_stride = THTensor_strideLegacyNoScalars((TENSOR1), DIMENSION); \
|
||||
TENSOR1##_size = THTensor_sizeLegacyNoScalars((TENSOR1), DIMENSION); \
|
||||
\
|
||||
TENSOR2##_data = THTensor_getStoragePtr(TENSOR2)->data<TYPE2>()+(TENSOR2)->storage_offset(); \
|
||||
TENSOR2##_stride = THTensor_strideLegacyNoScalars((TENSOR2), DIMENSION); \
|
||||
TENSOR2##_size = THTensor_sizeLegacyNoScalars((TENSOR2), DIMENSION); \
|
||||
\
|
||||
TENSOR3##_data = THTensor_getStoragePtr(TENSOR3)->data<TYPE3>()+(TENSOR3)->storage_offset(); \
|
||||
TENSOR3##_stride = THTensor_strideLegacyNoScalars((TENSOR3), DIMENSION); \
|
||||
TENSOR3##_size = THTensor_sizeLegacyNoScalars((TENSOR3), DIMENSION); \
|
||||
\
|
||||
while(!TH_TENSOR_DIM_APPLY_hasFinished) \
|
||||
{ \
|
||||
CODE \
|
||||
\
|
||||
if(THTensor_nDimensionLegacyNoScalars(TENSOR1) == 1) \
|
||||
break; \
|
||||
\
|
||||
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyNoScalars(TENSOR1); TH_TENSOR_DIM_APPLY_i++) \
|
||||
{ \
|
||||
if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
|
||||
{ \
|
||||
if(TH_TENSOR_DIM_APPLY_i == THTensor_nDimensionLegacyNoScalars(TENSOR1)-1) \
|
||||
{ \
|
||||
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
|
||||
break; \
|
||||
} \
|
||||
continue; \
|
||||
} \
|
||||
\
|
||||
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \
|
||||
TENSOR1##_data += THTensor_strideLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i); \
|
||||
TENSOR2##_data += THTensor_strideLegacyNoScalars(TENSOR2, TH_TENSOR_DIM_APPLY_i); \
|
||||
TENSOR3##_data += THTensor_strideLegacyNoScalars(TENSOR3, TH_TENSOR_DIM_APPLY_i); \
|
||||
\
|
||||
if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == THTensor_sizeLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i)) \
|
||||
{ \
|
||||
if(TH_TENSOR_DIM_APPLY_i == THTensor_nDimensionLegacyNoScalars(TENSOR1)-1) \
|
||||
{ \
|
||||
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
|
||||
break; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*THTensor_strideLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i); \
|
||||
TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*THTensor_strideLegacyNoScalars(TENSOR2, TH_TENSOR_DIM_APPLY_i); \
|
||||
TENSOR3##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*THTensor_strideLegacyNoScalars(TENSOR3, TH_TENSOR_DIM_APPLY_i); \
|
||||
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
THFree(TH_TENSOR_DIM_APPLY_counter); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Similar to DIM_APPLY(...) but we maintain two sets of pointers: one for the first tensor
|
||||
* and one for the second. The two tensors must have the same shape, other than at the
|
||||
* specified DIMENSION. This function makes it easy to store the output from reducing the
|
||||
* TENSOR at index. For example, in the sum example described below, we could instead do:
|
||||
*
|
||||
* int64_t i = 0;
|
||||
* TYPE1 sum;
|
||||
*
|
||||
* for (i = 0; i < TENSOR1##_size; ++i) {
|
||||
* sum += TENSOR1##_data[i * TENSOR1##_stride]
|
||||
* }
|
||||
* *TENSOR2##_data = (TYPE2) sum;
|
||||
*
|
||||
* In particular, we guarantee that the offset into TENSOR2 will be what you would get if
|
||||
* you applied all of the index values used to generate the offset into TENSOR1.
|
||||
*/
|
||||
#define TH_TENSOR_DIM_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, DIMENSION, CODE) \
|
||||
{ \
|
||||
TYPE1 *TENSOR1##_data = NULL; \
|
||||
TH_UNUSED int64_t TENSOR1##_stride = 0, TENSOR1##_size = 0; \
|
||||
TYPE2 *TENSOR2##_data = NULL; \
|
||||
TH_UNUSED int64_t TENSOR2##_stride = 0, TENSOR2##_size = 0; \
|
||||
int64_t *TH_TENSOR_DIM_APPLY_counter = NULL; \
|
||||
int TH_TENSOR_DIM_APPLY_hasFinished = THTensor_(numel)(TENSOR1) == 0; \
|
||||
int TH_TENSOR_DIM_APPLY_i; \
|
||||
\
|
||||
if( (DIMENSION < 0) || (DIMENSION >= THTensor_nDimensionLegacyNoScalars(TENSOR1)) ) \
|
||||
THError("invalid dimension %d (expected to be 0 <= dim < %d)", DIMENSION, THTensor_nDimensionLegacyAll(TENSOR1)); \
|
||||
if( THTensor_nDimensionLegacyNoScalars(TENSOR1) != THTensor_nDimensionLegacyNoScalars(TENSOR2)) { \
|
||||
AT_ERROR("inconsistent tensor size, expected ", #TENSOR1, " ", TENSOR1->sizes(), " and ", #TENSOR2, " ", TENSOR2->sizes(), " to have the same number of dimensions"); \
|
||||
} \
|
||||
TH_UNUSED int shape_check_flag = 0; \
|
||||
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyNoScalars(TENSOR1); TH_TENSOR_DIM_APPLY_i++) \
|
||||
{ \
|
||||
if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
|
||||
continue; \
|
||||
if(THTensor_sizeLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i) != THTensor_sizeLegacyNoScalars(TENSOR2, TH_TENSOR_DIM_APPLY_i)) { \
|
||||
AT_ERROR("Expected ", #TENSOR1, " ", TENSOR1->sizes(), " and ", #TENSOR2, " ", TENSOR2->sizes(), " to have the same size in dimension ", DIMENSION); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
if (TH_TENSOR_DIM_APPLY_hasFinished) { \
|
||||
return; \
|
||||
} \
|
||||
TH_TENSOR_DIM_APPLY_counter = (int64_t*)THAlloc(sizeof(int64_t)*(THTensor_nDimensionLegacyNoScalars(TENSOR1))); \
|
||||
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyNoScalars(TENSOR1); TH_TENSOR_DIM_APPLY_i++) \
|
||||
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
|
||||
\
|
||||
TENSOR1##_data = THTensor_getStoragePtr(TENSOR1)->data<TYPE1>()+(TENSOR1)->storage_offset(); \
|
||||
TENSOR1##_stride = THTensor_strideLegacyNoScalars((TENSOR1), DIMENSION); \
|
||||
TENSOR1##_size = THTensor_sizeLegacyNoScalars(TENSOR1, DIMENSION); \
|
||||
\
|
||||
TENSOR2##_data = THTensor_getStoragePtr(TENSOR2)->data<TYPE2>()+(TENSOR2)->storage_offset(); \
|
||||
TENSOR2##_stride = THTensor_strideLegacyNoScalars((TENSOR2), DIMENSION); \
|
||||
TENSOR2##_size = THTensor_sizeLegacyNoScalars(TENSOR2, DIMENSION); \
|
||||
\
|
||||
while(!TH_TENSOR_DIM_APPLY_hasFinished) \
|
||||
{ \
|
||||
CODE \
|
||||
\
|
||||
if(THTensor_nDimensionLegacyNoScalars(TENSOR1) == 1) \
|
||||
break; \
|
||||
\
|
||||
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyNoScalars(TENSOR1); TH_TENSOR_DIM_APPLY_i++) \
|
||||
{ \
|
||||
if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
|
||||
{ \
|
||||
if(TH_TENSOR_DIM_APPLY_i == THTensor_nDimensionLegacyNoScalars(TENSOR1)-1) \
|
||||
{ \
|
||||
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
|
||||
break; \
|
||||
} \
|
||||
continue; \
|
||||
} \
|
||||
\
|
||||
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \
|
||||
TENSOR1##_data += THTensor_strideLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i); \
|
||||
TENSOR2##_data += THTensor_strideLegacyNoScalars(TENSOR2, TH_TENSOR_DIM_APPLY_i); \
|
||||
\
|
||||
if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == THTensor_sizeLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i)) \
|
||||
{ \
|
||||
if(TH_TENSOR_DIM_APPLY_i == THTensor_nDimensionLegacyNoScalars(TENSOR1)-1) \
|
||||
{ \
|
||||
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
|
||||
break; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*THTensor_strideLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i); \
|
||||
TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*THTensor_strideLegacyNoScalars(TENSOR2, TH_TENSOR_DIM_APPLY_i); \
|
||||
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
THFree(TH_TENSOR_DIM_APPLY_counter); \
|
||||
}
|
||||
|
||||
/**
|
||||
* The basic idea for DIM_APPLY: Given a TENSOR and a DIMENSION, provide access to the data stored
|
||||
* at all sets of dimension values other than DIMENSION, such that we can get all the values at those
|
||||
* fixed indices for the various values at DIMENSION.
|
||||
*
|
||||
* Suppose we have a 2x3x4 Tensor A, and we have DIMENSION=2. Then we will hit CODE (2x3) times, and the
|
||||
* pointer into storage will be at:
|
||||
*
|
||||
* A[0][0]
|
||||
* A[0][1]
|
||||
* A[0][2]
|
||||
* A[1][0]
|
||||
* A[1][1]
|
||||
* A[1][2]
|
||||
*
|
||||
* And at each point, we can access the data for each of the four elements of the Tensor via
|
||||
* TENSOR##_stride. So for example, if we wanted to sum the elements there, we could do:
|
||||
*
|
||||
* int64_t i = 0;
|
||||
* TYPE sum;
|
||||
* for (i = 0; i < TENSOR##_size; i++) {
|
||||
* sum += TENSOR##_data[i * TENSOR##_stride]
|
||||
* }
|
||||
*
|
||||
* Note that we don't have to have DIMENSION be the last tensor. If we have DIMENSION=1, then we will hit the
|
||||
* code (2x4) times, with pointer into the storage at:
|
||||
*
|
||||
* offset +
|
||||
* stride_0 * 0 + stride_2 * 0
|
||||
* stride_0 * 1 + stride_2 * 0
|
||||
* stride_0 * 0 + stride_2 * 1
|
||||
* stride_0 * 1 + stride_2 * 1
|
||||
* stride_0 * 0 + stride_2 * 2
|
||||
* stride_0 * 1 + stride_2 * 2
|
||||
* stride_0 * 0 + stride_2 * 3
|
||||
* stride_0 * 1 + stride_2 * 3
|
||||
*
|
||||
* So we can again sum over the values at DIMENSION with the other indices fixed.
|
||||
*/
|
||||
#define TH_TENSOR_DIM_APPLY(TYPE, TENSOR, DIMENSION, CODE) \
|
||||
{ \
|
||||
TYPE *TENSOR##_data = NULL; \
|
||||
int64_t TENSOR##_stride = 0, TENSOR##_size = 0; \
|
||||
int64_t *TH_TENSOR_DIM_APPLY_counter = NULL; \
|
||||
int TH_TENSOR_DIM_APPLY_hasFinished = 0; \
|
||||
int TH_TENSOR_DIM_APPLY_i; \
|
||||
\
|
||||
if( (DIMENSION < 0) || (DIMENSION >= THTensor_nDimensionLegacyAll(TENSOR)) ) \
|
||||
THError("invalid dimension"); \
|
||||
\
|
||||
TENSOR##_data = THTensor_getStoragePtr(TENSOR)->data<TYPE>()+(TENSOR)->storage_offset(); \
|
||||
TENSOR##_stride = THTensor_strideLegacyNoScalars((TENSOR), DIMENSION); \
|
||||
TENSOR##_size = THTensor_sizeLegacyNoScalars(TENSOR, DIMENSION); \
|
||||
/* Counter stores the indices into the Tensor at any time */ \
|
||||
TH_TENSOR_DIM_APPLY_counter = (int64_t*)THAlloc(sizeof(int64_t)*(THTensor_nDimensionLegacyAll(TENSOR))); \
|
||||
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyAll(TENSOR); TH_TENSOR_DIM_APPLY_i++) \
|
||||
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
|
||||
\
|
||||
while(!TH_TENSOR_DIM_APPLY_hasFinished) \
|
||||
{ \
|
||||
CODE \
|
||||
\
|
||||
if(THTensor_nDimensionLegacyAll(TENSOR) == 1) \
|
||||
break; \
|
||||
\
|
||||
for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyAll(TENSOR); TH_TENSOR_DIM_APPLY_i++) \
|
||||
{ \
|
||||
/* Check if the index is equal to DIMENSION. We don't need to update the */ \
|
||||
/* offset if this is the case, and can consider the next index. However, */ \
|
||||
/* in the case that the DIMENSION is the last index in the Tensor, then */ \
|
||||
/* we have parsed the entire tensor and can exit */ \
|
||||
if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
|
||||
{ \
|
||||
if(TH_TENSOR_DIM_APPLY_i == THTensor_nDimensionLegacyAll(TENSOR)-1) \
|
||||
{ \
|
||||
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
|
||||
break; \
|
||||
} \
|
||||
continue; \
|
||||
} \
|
||||
\
|
||||
/* Bump the counter at this index, update the pointer */ \
|
||||
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \
|
||||
TENSOR##_data += THTensor_strideLegacyNoScalars(TENSOR, TH_TENSOR_DIM_APPLY_i); \
|
||||
\
|
||||
if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == THTensor_sizeLegacyNoScalars(TENSOR, TH_TENSOR_DIM_APPLY_i)) \
|
||||
{ \
|
||||
/* Handled TENSOR_size(dim) iterations for DIM_APPLY_i. If this is the last dimension, exit */ \
|
||||
if(TH_TENSOR_DIM_APPLY_i == THTensor_nDimensionLegacyAll(TENSOR)-1) \
|
||||
{ \
|
||||
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
|
||||
break; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* Reset the counter, and the pointer to the beginning of the storage for this combination of indices */ \
|
||||
TENSOR##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*THTensor_strideLegacyNoScalars(TENSOR, TH_TENSOR_DIM_APPLY_i); \
|
||||
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
THFree(TH_TENSOR_DIM_APPLY_counter); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
#include <TH/THTensor.hpp>
|
||||
#include <TH/THVector.h>
|
||||
#include <TH/THBlas.h>
|
||||
#include <TH/THTensorDimApply.h>
|
||||
|
||||
// NOLINTNEXTLINE(bugprone-suspicious-include)
|
||||
#include <TH/generic/THTensorMoreMath.cpp>
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
// NOLINTNEXTLINE(bugprone-suspicious-include)
|
||||
#include <TH/generic/THTensorMoreMath.cpp>
|
||||
#include <TH/THGenerateBoolType.h>
|
||||
|
||||
// NOLINTNEXTLINE(bugprone-suspicious-include)
|
||||
#include <TH/generic/THTensorMoreMath.cpp>
|
||||
#include <TH/THGenerateBFloat16Type.h>
|
||||
|
||||
// NOLINTNEXTLINE(bugprone-suspicious-include)
|
||||
#include <TH/generic/THTensorMoreMath.cpp>
|
||||
#include <TH/THGenerateHalfType.h>
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
#ifndef TH_VECTOR_INC
|
||||
#define TH_VECTOR_INC
|
||||
|
||||
#include <TH/THGeneral.h>
|
||||
#define THVector_(NAME) TH_CONCAT_4(TH,Real,Vector_,NAME)
|
||||
|
||||
/* We are going to use dynamic dispatch, and want only to generate declarations
|
||||
* of the vector functions */
|
||||
#include <TH/generic/THVector.h>
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
#include <TH/generic/THVector.h>
|
||||
#include <TH/THGenerateHalfType.h>
|
||||
|
||||
#include <TH/generic/THVector.h>
|
||||
#include <TH/THGenerateBoolType.h>
|
||||
|
||||
#include <TH/generic/THVector.h>
|
||||
#include <TH/THGenerateBFloat16Type.h>
|
||||
|
||||
#include <TH/generic/THVector.h>
|
||||
#include <TH/THGenerateComplexTypes.h>
|
||||
|
||||
#endif // TH_VECTOR_INC
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "TH/generic/THBlas.cpp"
|
||||
#else
|
||||
|
||||
#ifdef BLAS_F2C
|
||||
# define ffloat double
|
||||
#else
|
||||
# define ffloat float
|
||||
#endif
|
||||
|
||||
TH_EXTERNC void dswap_(int *n, double *x, int *incx, double *y, int *incy);
|
||||
TH_EXTERNC void sswap_(int *n, float *x, int *incx, float *y, int *incy);
|
||||
|
||||
void THBlas_(swap)(int64_t n, scalar_t *x, int64_t incx, scalar_t *y, int64_t incy)
|
||||
{
|
||||
if(n == 1)
|
||||
{
|
||||
incx = 1;
|
||||
incy = 1;
|
||||
}
|
||||
|
||||
#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
|
||||
if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
|
||||
{
|
||||
int i_n = (int)n;
|
||||
int i_incx = (int)incx;
|
||||
int i_incy = (int)incy;
|
||||
|
||||
#if defined(TH_REAL_IS_DOUBLE)
|
||||
dswap_(&i_n, x, &i_incx, y, &i_incy);
|
||||
#else
|
||||
sswap_(&i_n, x, &i_incx, y, &i_incy);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
{
|
||||
int64_t i;
|
||||
for(i = 0; i < n; i++)
|
||||
{
|
||||
scalar_t z = x[i*incx];
|
||||
x[i*incx] = y[i*incy];
|
||||
y[i*incy] = z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "TH/generic/THBlas.h"
|
||||
#else
|
||||
|
||||
/* Level 1 */
|
||||
TH_API void THBlas_(swap)(int64_t n, scalar_t *x, int64_t incx, scalar_t *y, int64_t incy);
|
||||
|
||||
#endif
|
||||
|
|
@ -8,50 +8,6 @@
|
|||
#include <ATen/NamedTensorUtils.h>
|
||||
#include <ATen/MemoryOverlap.h>
|
||||
|
||||
/**** access methods ****/
|
||||
THStorage *THTensor_(storage)(const THTensor *self)
|
||||
{
|
||||
return THTensor_getStoragePtr(self);
|
||||
}
|
||||
|
||||
ptrdiff_t THTensor_(storageOffset)(const THTensor *self)
|
||||
{
|
||||
return self->storage_offset();
|
||||
}
|
||||
|
||||
int THTensor_(nDimension)(const THTensor *self)
|
||||
{
|
||||
return THTensor_nDimension(self);
|
||||
}
|
||||
|
||||
int THTensor_(nDimensionLegacyNoScalars)(const THTensor *self)
|
||||
{
|
||||
return THTensor_nDimensionLegacyNoScalars(self);
|
||||
}
|
||||
|
||||
int THTensor_(nDimensionLegacyAll)(const THTensor *self)
|
||||
{
|
||||
return THTensor_nDimensionLegacyAll(self);
|
||||
}
|
||||
|
||||
int64_t THTensor_(size)(const THTensor *self, int dim)
|
||||
{
|
||||
THArgCheck((dim >= 0) && (dim < self->dim()), 2, "dimension %d out of range of %dD tensor",
|
||||
dim, THTensor_(nDimensionLegacyNoScalars)(self));
|
||||
return self->size(dim);
|
||||
}
|
||||
|
||||
int64_t THTensor_(stride)(const THTensor *self, int dim)
|
||||
{
|
||||
THArgCheck((dim >= 0) && (dim < self->dim()), 2, "dimension %d out of range of %dD tensor",
|
||||
dim, THTensor_(nDimensionLegacyNoScalars)(self));
|
||||
return self->stride(dim);
|
||||
}
|
||||
|
||||
scalar_t *THTensor_(data)(const THTensor *self) {
|
||||
return self->data<scalar_t>();
|
||||
}
|
||||
|
||||
/**** creation methods ****/
|
||||
|
||||
/* Empty init */
|
||||
|
|
@ -69,12 +25,6 @@ THTensor *THTensor_(new)(void)
|
|||
.release();
|
||||
}
|
||||
|
||||
/* Pointer-copy init */
|
||||
THTensor *THTensor_(newWithTensor)(THTensor *tensor)
|
||||
{
|
||||
return at::native::alias(THTensor_wrap(tensor)).unsafeReleaseTensorImpl();
|
||||
}
|
||||
|
||||
THTensor *THTensor_(newWithStorage1d)(THStorage *storage, ptrdiff_t storageOffset,
|
||||
int64_t size0, int64_t stride0)
|
||||
{
|
||||
|
|
@ -94,442 +44,14 @@ THTensor *THTensor_(newWithStorage1d)(THStorage *storage, ptrdiff_t storageOffse
|
|||
return self;
|
||||
}
|
||||
|
||||
THTensor *THTensor_(newWithSize1d)(int64_t size0)
|
||||
{
|
||||
THStorage *new_storage = THStorage_(new)();
|
||||
THTensor* self =
|
||||
c10::make_intrusive<at::TensorImpl, at::UndefinedTensorImpl>(
|
||||
c10::intrusive_ptr<at::StorageImpl>::reclaim(new_storage),
|
||||
at::DispatchKey::CPU,
|
||||
caffe2::TypeMeta::Make<scalar_t>())
|
||||
.release();
|
||||
THTensor_(setStorage)(self, new_storage, 0, {size0}, {});
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
THTensor *THTensor_(newClone)(THTensor *self)
|
||||
{
|
||||
// already available in Aten as at::clone()
|
||||
THTensor *tensor = THTensor_(new)();
|
||||
at::Tensor tensor_wrap = THTensor_wrap(tensor);
|
||||
at::Tensor self_wrap = THTensor_wrap(self);
|
||||
tensor_wrap.resize_as_(self_wrap);
|
||||
at::native::copy_(tensor_wrap, self_wrap, false);
|
||||
return tensor;
|
||||
}
|
||||
|
||||
THTensor *THTensor_(newContiguous)(THTensor *self)
|
||||
{
|
||||
if(!THTensor_(isContiguous)(self))
|
||||
return THTensor_(newClone)(self);
|
||||
else
|
||||
{
|
||||
THTensor_(retain)(self);
|
||||
return self;
|
||||
}
|
||||
}
|
||||
|
||||
THTensor *THTensor_(newSelect)(THTensor *tensor, int dimension_, int64_t sliceIndex_)
|
||||
{
|
||||
THTensor *self = THTensor_(newWithTensor)(tensor);
|
||||
THTensor_(select)(self, NULL, dimension_, sliceIndex_);
|
||||
return self;
|
||||
}
|
||||
|
||||
THTensor *THTensor_(newNarrow)(THTensor *tensor, int dimension_, int64_t firstIndex_, int64_t size_)
|
||||
{
|
||||
THTensor *self = THTensor_(newWithTensor)(tensor);
|
||||
THTensor_(narrow)(self, NULL, dimension_, firstIndex_, size_);
|
||||
return self;
|
||||
}
|
||||
|
||||
THTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int dimension2_)
|
||||
{
|
||||
THTensor *self = THTensor_(newWithTensor)(tensor);
|
||||
THTensor_(transpose)(self, NULL, dimension1_, dimension2_);
|
||||
return self;
|
||||
}
|
||||
|
||||
/* Resize */
|
||||
void THTensor_(resize)(THTensor *self, at::IntArrayRef size, at::IntArrayRef stride)
|
||||
{
|
||||
return THTensor_resize(self, size, stride);
|
||||
}
|
||||
|
||||
void THTensor_(resizeAs)(THTensor *self, THTensor *src)
|
||||
{
|
||||
// already available in Aten as at::resize_as_()
|
||||
if(!THTensor_(isSameSizeAs)(self, src))
|
||||
THTensor_(resizeNd)(self, src->dim(), THTensor_getSizePtr(src), NULL);
|
||||
}
|
||||
|
||||
void THTensor_(resize0d)(THTensor *tensor)
|
||||
{
|
||||
THTensor_(resizeNd)(tensor, 0, {}, nullptr);
|
||||
}
|
||||
|
||||
void THTensor_(resize1d)(THTensor *tensor, int64_t size0)
|
||||
{
|
||||
int64_t size[1] = {size0};
|
||||
THTensor_(resizeNd)(tensor, 1, size, nullptr);
|
||||
}
|
||||
|
||||
void THTensor_(resize2d)(THTensor *tensor, int64_t size0, int64_t size1)
|
||||
{
|
||||
int64_t size[2] = {size0, size1};
|
||||
THTensor_(resizeNd)(tensor, 2, size, nullptr);
|
||||
}
|
||||
|
||||
void THTensor_(resize3d)(THTensor *tensor, int64_t size0, int64_t size1, int64_t size2)
|
||||
{
|
||||
int64_t size[3] = {size0, size1, size2};
|
||||
THTensor_(resizeNd)(tensor, 3, size, nullptr);
|
||||
}
|
||||
|
||||
void THTensor_(resize4d)(THTensor *self, int64_t size0, int64_t size1, int64_t size2, int64_t size3)
|
||||
{
|
||||
int64_t size[4] = {size0, size1, size2, size3};
|
||||
THTensor_(resizeNd)(self, 4, size, nullptr);
|
||||
}
|
||||
|
||||
void THTensor_(resize5d)(THTensor *self, int64_t size0, int64_t size1, int64_t size2, int64_t size3, int64_t size4)
|
||||
{
|
||||
int64_t size[5] = {size0, size1, size2, size3, size4};
|
||||
THTensor_(resizeNd)(self, 5, size, nullptr);
|
||||
}
|
||||
|
||||
void THTensor_(set)(THTensor *self, THTensor *src)
|
||||
{
|
||||
if(self != src)
|
||||
THTensor_(setStorage)(self,
|
||||
THTensor_getStoragePtr(src),
|
||||
src->storage_offset(),
|
||||
src->sizes(),
|
||||
src->strides());
|
||||
}
|
||||
|
||||
void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_)
|
||||
{
|
||||
THTensor_setStorage(self, storage_, storageOffset_, size_, stride_);
|
||||
}
|
||||
|
||||
void THTensor_(narrow)(THTensor *self, THTensor *src, int dimension, int64_t firstIndex, int64_t size)
|
||||
{
|
||||
if(!src)
|
||||
src = self;
|
||||
|
||||
THArgCheck( (dimension >= 0) && (dimension < src->dim()), 2, "out of range");
|
||||
THArgCheck( firstIndex >= 0, 3, "out of range");
|
||||
THArgCheck( size >= 0, 4, "out of range");
|
||||
THArgCheck(firstIndex <= src->size(dimension) - size, 4, "out of range");
|
||||
|
||||
THTensor_(set)(self, src);
|
||||
|
||||
if (firstIndex > 0) {
|
||||
self->set_storage_offset(self->storage_offset() + firstIndex*self->stride(dimension));
|
||||
}
|
||||
|
||||
self->set_size(dimension, size);
|
||||
}
|
||||
|
||||
void THTensor_(select)(THTensor *self, THTensor *src, int dimension, int64_t sliceIndex)
|
||||
{
|
||||
int d;
|
||||
|
||||
if(!src)
|
||||
src = self;
|
||||
|
||||
THArgCheck(src->dim() > 0, 1, "cannot select on a 0-dim tensor");
|
||||
THArgCheck((dimension >= 0) && (dimension < src->dim()), 2, "out of range");
|
||||
THArgCheck((sliceIndex >= 0) && (sliceIndex < src->size(dimension)), 3, "out of range");
|
||||
|
||||
THTensor_(set)(self, src);
|
||||
THTensor_(narrow)(self, NULL, dimension, sliceIndex, 1);
|
||||
|
||||
at::DimVector newSize(static_cast<size_t>(self->dim()-1));
|
||||
at::DimVector newStride(static_cast<size_t>(self->dim()-1));
|
||||
for (d = 0; d < dimension; d++)
|
||||
{
|
||||
newSize[d] = self->size(d);
|
||||
newStride[d] = self->stride(d);
|
||||
}
|
||||
|
||||
for(d = dimension; d < self->dim()-1; d++)
|
||||
{
|
||||
newSize[d] = self->size(d+1);
|
||||
newStride[d] = self->stride(d+1);
|
||||
}
|
||||
self->set_sizes_and_strides(newSize, newStride);
|
||||
}
|
||||
|
||||
void THTensor_(transpose)(THTensor *self, THTensor *src, int dimension1, int dimension2)
|
||||
{
|
||||
int64_t z;
|
||||
|
||||
if(!src)
|
||||
src = self;
|
||||
|
||||
THArgCheck( (dimension1 >= 0) && (dimension1 < THTensor_nDimensionLegacyNoScalars(src)), 1, "out of range");
|
||||
THArgCheck( (dimension2 >= 0) && (dimension2 < THTensor_nDimensionLegacyNoScalars(src)), 2, "out of range");
|
||||
|
||||
THTensor_(set)(self, src);
|
||||
|
||||
if(dimension1 == dimension2)
|
||||
return;
|
||||
|
||||
z = self->stride(dimension1);
|
||||
self->set_stride(dimension1, self->stride(dimension2));
|
||||
self->set_stride(dimension2, z);
|
||||
z = self->size(dimension1);
|
||||
self->set_size(dimension1, self->size(dimension2));
|
||||
self->set_size(dimension2, z);
|
||||
}
|
||||
|
||||
void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension)
|
||||
{
|
||||
int d;
|
||||
|
||||
if(!src)
|
||||
src = self;
|
||||
|
||||
THArgCheck((dimension >= 0) && (dimension < src->dim()), 2, "dimension out of range");
|
||||
|
||||
THTensor_(set)(self, src);
|
||||
|
||||
if(src->size(dimension) == 1)
|
||||
{
|
||||
at::DimVector newSize(static_cast<size_t>(self->dim() - 1));
|
||||
at::DimVector newStride(static_cast<size_t>(self->dim() - 1));
|
||||
for (d = 0; d < dimension; d++)
|
||||
{
|
||||
newSize[d] = self->size(d);
|
||||
newStride[d] = self->stride(d);
|
||||
}
|
||||
|
||||
for(d = dimension; d < self->dim()-1; d++)
|
||||
{
|
||||
newSize[d] = self->size(d+1);
|
||||
newStride[d] = self->stride(d+1);
|
||||
}
|
||||
self->set_sizes_and_strides(newSize, newStride);
|
||||
}
|
||||
}
|
||||
|
||||
void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension)
|
||||
{
|
||||
int d;
|
||||
|
||||
if(!src)
|
||||
src = self;
|
||||
|
||||
THArgCheck((dimension >= 0) && (dimension <= src->dim()), 2, "dimension out of range");
|
||||
|
||||
THTensor_(set)(self, src);
|
||||
|
||||
at::DimVector newSize(static_cast<size_t>(/* size */ self->dim()+1));
|
||||
at::DimVector newStride(static_cast<size_t>(/* size */ self->dim()+1));
|
||||
|
||||
for(d = self->dim(); d > dimension; d--)
|
||||
{
|
||||
newSize[d] = self->size(d-1);
|
||||
newStride[d] = self->stride(d-1);
|
||||
}
|
||||
if (dimension < self->dim())
|
||||
{
|
||||
newStride[dimension] = self->size(dimension) * self->stride(dimension);
|
||||
}
|
||||
else
|
||||
{
|
||||
newStride[dimension] = 1;
|
||||
}
|
||||
newSize[dimension] = 1;
|
||||
for(d = dimension - 1; d >= 0; d--)
|
||||
{
|
||||
newSize[d] = self->size(d);
|
||||
newStride[d] = self->stride(d);
|
||||
}
|
||||
self->set_sizes_and_strides(newSize, newStride);
|
||||
}
|
||||
|
||||
int THTensor_(isTransposed)(const THTensor *self)
|
||||
{
|
||||
if (THTensor_(isContiguous)(self)) {
|
||||
return 0;
|
||||
}
|
||||
int64_t max_stride = 1;
|
||||
int64_t size_max_stride = 1;
|
||||
int64_t z = 1;
|
||||
int d;
|
||||
for (d = 0; d < self->dim(); ++d) {
|
||||
if (self->stride(d) == 0 && self->size(d) != 1)
|
||||
return 0;
|
||||
if (self->stride(d) > max_stride) {
|
||||
max_stride = self->stride(d);
|
||||
size_max_stride = self->size(d);
|
||||
}
|
||||
z *= self->size(d);
|
||||
}
|
||||
if (z == max_stride * size_max_stride) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int THTensor_(isContiguous)(const THTensor *self)
|
||||
{
|
||||
return self->is_contiguous();
|
||||
}
|
||||
|
||||
int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor* src)
|
||||
{
|
||||
int d;
|
||||
if (self->dim() != src->dim())
|
||||
return 0;
|
||||
for(d = 0; d < self->dim(); ++d)
|
||||
{
|
||||
if(self->size(d) != src->size(d))
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
ptrdiff_t THTensor_(nElement)(const THTensor *self)
|
||||
{
|
||||
if(THTensor_nDimensionLegacyAll(self) == 0)
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
ptrdiff_t nElement = 1;
|
||||
int d;
|
||||
for(d = 0; d < THTensor_nDimension(self); d++)
|
||||
nElement *= self->size(d);
|
||||
return nElement;
|
||||
}
|
||||
}
|
||||
|
||||
// NB: It is INVALID to call this on an UndefinedTensorImpl
|
||||
void THTensor_(retain)(THTensor *self)
|
||||
{
|
||||
c10::raw::intrusive_ptr::incref(self);
|
||||
}
|
||||
|
||||
void THTensor_(free)(THTensor *self)
|
||||
{
|
||||
THTensor_free(self);
|
||||
}
|
||||
|
||||
void THTensor_(freeCopyTo)(THTensor *self, THTensor *dst)
|
||||
{
|
||||
if(self != dst) {
|
||||
at::Tensor dst_wrap = THTensor_wrap(dst);
|
||||
at::Tensor self_wrap = THTensor_wrap(self);
|
||||
at::native::copy_(dst_wrap, self_wrap, false);
|
||||
}
|
||||
|
||||
THTensor_(free)(self);
|
||||
}
|
||||
|
||||
/*******************************************************************************/
|
||||
|
||||
void THTensor_(resizeNd)(THTensor *self, int nDimension, const int64_t *size, const int64_t *stride)
|
||||
{
|
||||
return THTensor_resizeNd(self, nDimension, size, stride);
|
||||
}
|
||||
|
||||
void THTensor_(set0d)(THTensor *tensor, scalar_t value)
|
||||
{
|
||||
THArgCheck(THTensor_nDimension(tensor) == 0, 1, "tensor must have no dimensions");
|
||||
THStorage_(set)(THTensor_getStoragePtr(tensor), tensor->storage_offset(), value);
|
||||
}
|
||||
|
||||
scalar_t THTensor_(get0d)(const THTensor *tensor)
|
||||
{
|
||||
THArgCheck(THTensor_nDimension(tensor) == 0, 1, "tensor must have no dimensions");
|
||||
return THStorage_(get)(THTensor_getStoragePtr(tensor), tensor->storage_offset());
|
||||
}
|
||||
|
||||
void THTensor_(set1d)(THTensor *tensor, int64_t x0, scalar_t value)
|
||||
{
|
||||
THArgCheck(THTensor_nDimensionLegacyNoScalars(tensor) == 1, 1, "tensor must have one dimension");
|
||||
THArgCheck( (x0 >= 0) && (x0 < THTensor_sizeLegacyNoScalars(tensor, 0)), 2, "out of range");
|
||||
THStorage_(set)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*THTensor_strideLegacyNoScalars(tensor, 0), value);
|
||||
}
|
||||
|
||||
scalar_t THTensor_(get1d)(const THTensor *tensor, int64_t x0)
|
||||
{
|
||||
THArgCheck(THTensor_nDimensionLegacyNoScalars(tensor) == 1, 1, "tensor must have one dimension");
|
||||
THArgCheck( (x0 >= 0) && (x0 < THTensor_sizeLegacyNoScalars(tensor, 0)), 2, "out of range");
|
||||
return THStorage_(get)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*THTensor_strideLegacyNoScalars(tensor, 0));
|
||||
}
|
||||
|
||||
void THTensor_(set2d)(THTensor *tensor, int64_t x0, int64_t x1, scalar_t value)
|
||||
{
|
||||
THArgCheck(THTensor_nDimensionLegacyAll(tensor) == 2, 1, "tensor must have two dimensions");
|
||||
THArgCheck((x0 >= 0) && (x0 < tensor->size(0)) && (x1 >= 0) && (x1 < tensor->size(1)), 2, "out of range");
|
||||
THStorage_(set)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1), value);
|
||||
}
|
||||
|
||||
scalar_t THTensor_(get2d)(const THTensor *tensor, int64_t x0, int64_t x1)
|
||||
{
|
||||
THArgCheck(THTensor_nDimensionLegacyAll(tensor) == 2, 1, "tensor must have two dimensions");
|
||||
THArgCheck((x0 >= 0) && (x0 < tensor->size(0)) && (x1 >= 0) && (x1 < tensor->size(1)), 2, "out of range");
|
||||
return THStorage_(get)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1));
|
||||
}
|
||||
|
||||
void THTensor_(set3d)(THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, scalar_t value)
|
||||
{
|
||||
THArgCheck(THTensor_nDimensionLegacyAll(tensor) == 3, 1, "tensor must have three dimensions");
|
||||
THArgCheck( (x0 >= 0) && (x0 < tensor->size(0)) && (x1 >= 0) && (x1 < tensor->size(1)) && (x2 >= 0) && (x2 < tensor->size(2)), 2, "out of range");
|
||||
THStorage_(set)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1)+x2*tensor->stride(2), value);
|
||||
}
|
||||
|
||||
scalar_t THTensor_(get3d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_t x2)
|
||||
{
|
||||
THArgCheck(THTensor_nDimensionLegacyAll(tensor) == 3, 1, "tensor must have three dimensions");
|
||||
THArgCheck( (x0 >= 0) && (x0 < tensor->size(0)) && (x1 >= 0) && (x1 < tensor->size(1)) && (x2 >= 0) && (x2 < tensor->size(2)), 2, "out of range");
|
||||
return THStorage_(get)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1)+x2*tensor->stride(2));
|
||||
}
|
||||
|
||||
void THTensor_(set4d)(THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, int64_t x3, scalar_t value)
|
||||
{
|
||||
THArgCheck(THTensor_nDimensionLegacyAll(tensor) == 4, 1, "tensor must have four dimensions");
|
||||
THArgCheck((x0 >= 0) && (x0 < tensor->size(0)) && (x1 >= 0) && (x1 < tensor->size(1)) && (x2 >= 0) && (x2 < tensor->size(2)) && (x3 >= 0) && (x3 < tensor->size(3)), 2, "out of range");
|
||||
THStorage_(set)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1)+x2*tensor->stride(2)+x3*tensor->stride(3), value);
|
||||
}
|
||||
|
||||
scalar_t THTensor_(get4d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, int64_t x3)
|
||||
{
|
||||
THArgCheck(THTensor_nDimensionLegacyAll(tensor) == 4, 1, "tensor must have four dimensions");
|
||||
THArgCheck((x0 >= 0) && (x0 < tensor->size(0)) && (x1 >= 0) && (x1 < tensor->size(1)) && (x2 >= 0) && (x2 < tensor->size(2)) && (x3 >= 0) && (x3 < tensor->size(3)), 2, "out of range");
|
||||
return THStorage_(get)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1)+x2*tensor->stride(2)+x3*tensor->stride(3));
|
||||
}
|
||||
|
||||
THDescBuff THTensor_(desc)(const THTensor *tensor) {
|
||||
const int L = TH_DESC_BUFF_LEN;
|
||||
THDescBuff buf;
|
||||
char *str = buf.str;
|
||||
int n = 0;
|
||||
#define _stringify(x) #x
|
||||
n += snprintf(str, L-n, "torch." _stringify(x) "Tensor of size ");
|
||||
#undef _stringify
|
||||
int i;
|
||||
for(i = 0; i < THTensor_nDimension(tensor); i++) {
|
||||
if(n >= L) break;
|
||||
n += snprintf(str+n, L-n, "%" PRId64, tensor->size(i));
|
||||
if(i < THTensor_nDimension(tensor)-1) {
|
||||
n += snprintf(str+n, L-n, "x");
|
||||
}
|
||||
}
|
||||
if(n >= L) {
|
||||
snprintf(str+L-4, 4, "...");
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
THDescBuff THTensor_(sizeDesc)(const THTensor *tensor) {
|
||||
THDescBuff buf = _THSizeDesc(tensor->sizes().data(), tensor->sizes().size());
|
||||
return buf;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -55,24 +55,12 @@ TH_API THTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int
|
|||
// This is especially likely to happen when the tensor is not contiguous. In general, if you still need the
|
||||
// values, unless you are doing some size and stride tricks, do not use resize*.
|
||||
TH_API void THTensor_(resizeNd)(THTensor *tensor, int nDimension, const int64_t *size, const int64_t *stride);
|
||||
TH_API void THTensor_(resizeAs)(THTensor *tensor, THTensor *src);
|
||||
TH_API void THTensor_(resize0d)(THTensor *tensor);
|
||||
TH_API void THTensor_(resize1d)(THTensor *tensor, int64_t size0_);
|
||||
TH_API void THTensor_(resize2d)(THTensor *tensor, int64_t size0_, int64_t size1_);
|
||||
TH_API void THTensor_(resize3d)(THTensor *tensor, int64_t size0_, int64_t size1_, int64_t size2_);
|
||||
TH_API void THTensor_(resize4d)(THTensor *tensor, int64_t size0_, int64_t size1_, int64_t size2_, int64_t size3_);
|
||||
TH_API void THTensor_(resize5d)(THTensor *tensor, int64_t size0_, int64_t size1_, int64_t size2_, int64_t size3_, int64_t size4_);
|
||||
// Note: these are legacy resize functions that treat sizes as size->size() == 0 and size->data<int64_t>() as being 0-terminated.
|
||||
|
||||
TH_API void THTensor_(set)(THTensor *self, THTensor *src);
|
||||
|
||||
TH_API void THTensor_(narrow)(THTensor *self, THTensor *src, int dimension_, int64_t firstIndex_, int64_t size_);
|
||||
TH_API void THTensor_(select)(THTensor *self, THTensor *src, int dimension_, int64_t sliceIndex_);
|
||||
TH_API void THTensor_(transpose)(THTensor *self, THTensor *src, int dimension1_, int dimension2_);
|
||||
TH_API int THTensor_(isTransposed)(const THTensor *self);
|
||||
|
||||
TH_API void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension_);
|
||||
TH_API void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension_);
|
||||
|
||||
TH_API int THTensor_(isContiguous)(const THTensor *self);
|
||||
TH_API int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor *src);
|
||||
|
|
@ -80,23 +68,5 @@ TH_API ptrdiff_t THTensor_(nElement)(const THTensor *self);
|
|||
|
||||
TH_API void THTensor_(retain)(THTensor *self);
|
||||
TH_API void THTensor_(free)(THTensor *self);
|
||||
TH_API void THTensor_(freeCopyTo)(THTensor *self, THTensor *dst);
|
||||
|
||||
/* Slow access methods [check everything] */
|
||||
TH_API void THTensor_(set0d)(THTensor *tensor, scalar_t value);
|
||||
TH_API void THTensor_(set1d)(THTensor *tensor, int64_t x0, scalar_t value);
|
||||
TH_API void THTensor_(set2d)(THTensor *tensor, int64_t x0, int64_t x1, scalar_t value);
|
||||
TH_API void THTensor_(set3d)(THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, scalar_t value);
|
||||
TH_API void THTensor_(set4d)(THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, int64_t x3, scalar_t value);
|
||||
|
||||
TH_API scalar_t THTensor_(get0d)(const THTensor *tensor);
|
||||
TH_API scalar_t THTensor_(get1d)(const THTensor *tensor, int64_t x0);
|
||||
TH_API scalar_t THTensor_(get2d)(const THTensor *tensor, int64_t x0, int64_t x1);
|
||||
TH_API scalar_t THTensor_(get3d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_t x2);
|
||||
TH_API scalar_t THTensor_(get4d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, int64_t x3);
|
||||
|
||||
/* Debug methods */
|
||||
TH_API THDescBuff THTensor_(desc)(const THTensor *tensor);
|
||||
TH_API THDescBuff THTensor_(sizeDesc)(const THTensor *tensor);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -11,6 +11,4 @@
|
|||
TH_CPP_API void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
|
||||
at::IntArrayRef size_, at::IntArrayRef stride_);
|
||||
|
||||
TH_CPP_API void THTensor_(resize)(THTensor *self, at::IntArrayRef size, at::IntArrayRef stride);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,369 +0,0 @@
|
|||
#include <TH/THTensorApply.h>
|
||||
|
||||
#ifndef NAN
|
||||
#define NAN (nan(NULL))
|
||||
#endif
|
||||
|
||||
#define HYPER_TH_OMP_OVERHEAD_THRESHOLD (at::internal::GRAIN_SIZE / 16)
|
||||
#define ORDIN_TH_OMP_OVERHEAD_THRESHOLD (at::internal::GRAIN_SIZE / 4)
|
||||
#define UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD (at::internal::GRAIN_SIZE / 2)
|
||||
#define TH_OMP_OVERHEAD_THRESHOLD (at::internal::GRAIN_SIZE)
|
||||
|
||||
#define TH_CHECK_SAME_SIZE(TENSOR1, TENSOR2) \
|
||||
{ \
|
||||
if (!THTensor_(isSameSizeAs)(TENSOR1, TENSOR2)) { \
|
||||
AT_ERROR("inconsistent tensor size, expected ", #TENSOR1, " ", TENSOR1->sizes(), " and ", #TENSOR2, " ", TENSOR2->sizes(), " to have the same size"); \
|
||||
} \
|
||||
}
|
||||
|
||||
// Used for `scatter` and `scatterAdd`
|
||||
// Assumes TENSOR1 is index
|
||||
// TENSOR2 is real
|
||||
// TENSOR3 is src
|
||||
// Tests:
|
||||
// 1. index->size(d) <= src->size(d) for all d
|
||||
// 2. index->size(d) <= real->size(d) for all d != dim
|
||||
#define TH_TENSOR_DIM_APPLY3_SIZE_SCATTER(TENSOR1, TENSOR2, TENSOR3, DIMENSION) \
|
||||
{ \
|
||||
int shape_check_flag = 0; \
|
||||
for (TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < THTensor_nDimensionLegacyAll(TENSOR2); TH_TENSOR_DIM_APPLY_i++) \
|
||||
{ \
|
||||
int64_t TENSOR1##_dim_size = THTensor_sizeLegacyNoScalars(TENSOR1, TH_TENSOR_DIM_APPLY_i); \
|
||||
if (TH_TENSOR_DIM_APPLY_i != DIMENSION) { \
|
||||
if (TENSOR1##_dim_size > THTensor_sizeLegacyNoScalars(TENSOR2, TH_TENSOR_DIM_APPLY_i)) { \
|
||||
shape_check_flag = 1; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
if (TENSOR1##_dim_size > THTensor_sizeLegacyNoScalars(TENSOR3, TH_TENSOR_DIM_APPLY_i)) { \
|
||||
shape_check_flag = 1; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
if (shape_check_flag == 1) { \
|
||||
AT_ERROR("Expected ", #TENSOR1, " ", TENSOR1->sizes(), " to be smaller size than ", #TENSOR3, " ", TENSOR3->sizes(), " and to be smaller than ", #TENSOR2, " ", TENSOR2->sizes(), " apart from dimension ", DIMENSION); \
|
||||
} \
|
||||
}
|
||||
|
||||
#undef th_isnan
|
||||
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
|
||||
#define th_isnan(val) \
|
||||
(std::isnan(val))
|
||||
#else
|
||||
#define th_isnan(val) (0)
|
||||
#endif
|
||||
|
||||
#undef th_isnan_break
|
||||
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
|
||||
#define th_isnan_break(val) \
|
||||
if (std::isnan(val)) break;
|
||||
#else
|
||||
#define th_isnan_break(val)
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#define PRAGMA(P) _Pragma(#P)
|
||||
#define PRAGMA_IVDEP // Noop
|
||||
#define PRAGMA_SIMD // Noop
|
||||
#elif defined(_MSC_VER)
|
||||
#define PRAGMA(P) __pragma(P)
|
||||
# if _MSC_VER < 1920
|
||||
// MSVC < 2019 doesn't support loop pragmas.
|
||||
# define PRAGMA_IVDEP // Noop
|
||||
# define PRAGMA_SIMD // Noop
|
||||
# else
|
||||
# define PRAGMA_IVDEP PRAGMA(loop(ivdep))
|
||||
# define PRAGMA_SIMD PRAGMA(omp simd)
|
||||
# endif
|
||||
#else
|
||||
#define PRAGMA(P) _Pragma(#P)
|
||||
#define PRAGMA_IVDEP PRAGMA(ivdep)
|
||||
#define PRAGMA_SIMD PRAGMA(simd)
|
||||
#endif
|
||||
|
||||
#define TH_TENSOR_APPLY2_PARALLEL(SIZE, CONTIG1, CONTIG2, TYPE1, TENSOR1, TYPE2, TENSOR2, CODE, THRESHOLD) \
|
||||
{ \
|
||||
/* for advanced searching index*/ \
|
||||
if (CONTIG1 && CONTIG2) { \
|
||||
TYPE1 *rp = THTensor_getStoragePtr(TENSOR1)->data<TYPE1>()+TENSOR1->storage_offset(); \
|
||||
TYPE2 *tp = THTensor_getStoragePtr(TENSOR2)->data<TYPE2>()+TENSOR2->storage_offset(); \
|
||||
if (tp != (TYPE2*)rp) { \
|
||||
at::parallel_for(0, SIZE, (THRESHOLD * 10), [&](int64_t begin, int64_t end) { \
|
||||
PRAGMA_IVDEP \
|
||||
for (auto iter = begin; iter < end; iter++) { \
|
||||
TYPE2 *TENSOR2##_data = tp+iter; \
|
||||
TYPE1 *TENSOR1##_data = rp+iter; \
|
||||
CODE \
|
||||
} \
|
||||
}); \
|
||||
} else { \
|
||||
at::parallel_for(0, SIZE, (THRESHOLD * 10), [&](int64_t begin, int64_t end) { \
|
||||
PRAGMA_SIMD \
|
||||
for (auto iter = begin; iter < end; iter++) { \
|
||||
TYPE2* TENSOR2##_data = tp+iter; \
|
||||
TYPE1* TENSOR1##_data = rp+iter; \
|
||||
CODE \
|
||||
} \
|
||||
}); \
|
||||
} \
|
||||
} else { \
|
||||
/* The following strategy is not easy to understand.
|
||||
* 1. Collapse the dimension of the tensors in order to decrease the number of nested loops.
|
||||
* 2. Calculate the numbers of elements allocated in each thread and the line index of the first one.
|
||||
* 3. Calculate the memory offset of the first element and the indexes in each dimension of the
|
||||
* first one.
|
||||
* 4. iterate all elements in each thread. update the indexes in each dimension of the rest.
|
||||
*/ \
|
||||
int TH_TENSOR_APPLY_hasFinished = 0; \
|
||||
int64_t TH_TENSOR_dim_index = 0; \
|
||||
/*step 1*/ \
|
||||
__TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, -1, 1) \
|
||||
__TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, -1, 1) \
|
||||
if (0 == TH_TENSOR_APPLY_hasFinished) { \
|
||||
auto TENSOR1##_i_local = TENSOR1##_i; \
|
||||
auto TENSOR2##_i_local = TENSOR2##_i; \
|
||||
auto TENSOR1##_data_local = TENSOR1##_data; \
|
||||
auto TENSOR2##_data_local = TENSOR2##_data; \
|
||||
at::parallel_for(0, SIZE, THRESHOLD, [&](int64_t begin, int64_t end) { \
|
||||
auto TENSOR1##_i = TENSOR1##_i_local; \
|
||||
auto TENSOR2##_i = TENSOR2##_i_local; \
|
||||
auto TENSOR1##_data = TENSOR1##_data_local; \
|
||||
auto TENSOR2##_data = TENSOR2##_data_local; \
|
||||
/*step 2*/ \
|
||||
ptrdiff_t line_index_start = begin; \
|
||||
ptrdiff_t line_seg_length = (end - begin); \
|
||||
/* step 3*/ \
|
||||
__TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR2); \
|
||||
__TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR1); \
|
||||
TENSOR2##_data += TENSOR2##_memory_offset; \
|
||||
TENSOR1##_data += TENSOR1##_memory_offset; \
|
||||
ptrdiff_t count = 0; \
|
||||
ptrdiff_t TENSOR2##_start = TENSOR2##_counter_tmp[TENSOR2##_dim-1]; \
|
||||
ptrdiff_t TENSOR1##_start = TENSOR1##_counter_tmp[TENSOR1##_dim-1]; \
|
||||
/* step 4*/ \
|
||||
while (count < line_seg_length) { \
|
||||
for (TENSOR2##_i=TENSOR2##_start, TENSOR1##_i = TENSOR1##_start; ((count < line_seg_length) && (TENSOR2##_i < TENSOR2##_size) && (TENSOR1##_i < TENSOR1##_size)); ++TENSOR2##_i, ++TENSOR1##_i, ++count) { \
|
||||
CODE \
|
||||
TENSOR2##_data += TENSOR2##_stride; \
|
||||
TENSOR1##_data += TENSOR1##_stride; \
|
||||
} \
|
||||
if (count < line_seg_length) { \
|
||||
__TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR2); \
|
||||
__TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR1); \
|
||||
} \
|
||||
} \
|
||||
if (TENSOR1##_counter_tmp != NULL) { \
|
||||
THFree(TENSOR1##_counter_tmp); \
|
||||
} \
|
||||
if (TENSOR2##_counter_tmp != NULL) { \
|
||||
THFree(TENSOR2##_counter_tmp); \
|
||||
} \
|
||||
}); \
|
||||
} \
|
||||
if (TENSOR2##_counter != NULL) { \
|
||||
THFree(TENSOR2##_counter); \
|
||||
} \
|
||||
if (TENSOR1##_counter != NULL) { \
|
||||
THFree(TENSOR1##_counter); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TH_TENSOR_APPLY3_PARALLEL(SIZE, CONTIG1, CONTIG2, CONTIG3, TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE, THRESHOLD) \
|
||||
{ \
|
||||
/* for adveanced searching index*/ \
|
||||
if (CONTIG1 && CONTIG2 && CONTIG3) { \
|
||||
TYPE1 *rp = THTensor_getStoragePtr(TENSOR1)->data<TYPE1>()+TENSOR1->storage_offset(); \
|
||||
TYPE2 *tp = THTensor_getStoragePtr(TENSOR2)->data<TYPE2>()+TENSOR2->storage_offset(); \
|
||||
TYPE3 *srcp = THTensor_getStoragePtr(TENSOR3)->data<TYPE3>()+TENSOR3->storage_offset(); \
|
||||
if (tp != (TYPE2*)rp) { \
|
||||
at::parallel_for(0, SIZE, (THRESHOLD * 10), [&](int64_t begin, int64_t end) { \
|
||||
PRAGMA_IVDEP \
|
||||
for (auto iter = begin; iter < end; iter++) { \
|
||||
TYPE1 *TENSOR1##_data = rp+iter; \
|
||||
TYPE2 *TENSOR2##_data = tp+iter; \
|
||||
TYPE3 *TENSOR3##_data = srcp+iter; \
|
||||
CODE \
|
||||
} \
|
||||
}); \
|
||||
} else { \
|
||||
at::parallel_for(0, SIZE, (THRESHOLD * 10), [&](int64_t begin, int64_t end) { \
|
||||
PRAGMA_SIMD \
|
||||
for (auto iter = begin; iter < end; iter++) { \
|
||||
TYPE1 *TENSOR1##_data = rp+iter; \
|
||||
TYPE2 *TENSOR2##_data = tp+iter; \
|
||||
TYPE3 *TENSOR3##_data = srcp+iter; \
|
||||
CODE \
|
||||
} \
|
||||
}); \
|
||||
} \
|
||||
} else { \
|
||||
int TH_TENSOR_APPLY_hasFinished = 0; \
|
||||
int64_t TH_TENSOR_dim_index = 0; \
|
||||
__TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, -1, 1) \
|
||||
__TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, -1, 1) \
|
||||
__TH_TENSOR_APPLYX_PREAMBLE(TYPE3, TENSOR3, -1, 1) \
|
||||
if (0 == TH_TENSOR_APPLY_hasFinished) { \
|
||||
auto TENSOR1##_i_local = TENSOR1##_i; \
|
||||
auto TENSOR2##_i_local = TENSOR2##_i; \
|
||||
auto TENSOR3##_i_local = TENSOR3##_i; \
|
||||
auto TENSOR1##_data_local = TENSOR1##_data; \
|
||||
auto TENSOR2##_data_local = TENSOR2##_data; \
|
||||
auto TENSOR3##_data_local = TENSOR3##_data; \
|
||||
at::parallel_for(0, SIZE, THRESHOLD, [&](int64_t begin, int64_t end) { \
|
||||
auto TENSOR1##_i = TENSOR1##_i_local; \
|
||||
auto TENSOR2##_i = TENSOR2##_i_local; \
|
||||
auto TENSOR3##_i = TENSOR3##_i_local; \
|
||||
auto TENSOR1##_data = TENSOR1##_data_local; \
|
||||
auto TENSOR2##_data = TENSOR2##_data_local; \
|
||||
auto TENSOR3##_data = TENSOR3##_data_local; \
|
||||
ptrdiff_t line_index_start = begin; \
|
||||
ptrdiff_t line_seg_length = (end - begin); \
|
||||
__TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR1); \
|
||||
__TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR2); \
|
||||
__TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR3); \
|
||||
TENSOR1##_data += TENSOR1##_memory_offset; \
|
||||
TENSOR2##_data += TENSOR2##_memory_offset; \
|
||||
TENSOR3##_data += TENSOR3##_memory_offset; \
|
||||
ptrdiff_t count = 0; \
|
||||
ptrdiff_t TENSOR1##_start = TENSOR1##_counter_tmp[TENSOR1##_dim - 1]; \
|
||||
ptrdiff_t TENSOR2##_start = TENSOR2##_counter_tmp[TENSOR2##_dim - 1]; \
|
||||
ptrdiff_t TENSOR3##_start = TENSOR3##_counter_tmp[TENSOR3##_dim - 1]; \
|
||||
while (count < line_seg_length) { \
|
||||
for (TENSOR1##_i=TENSOR1##_start, TENSOR2##_i=TENSOR2##_start,TENSOR3##_i=TENSOR3##_start; (count<line_seg_length)&&(TENSOR1##_i<TENSOR1##_size)&&(TENSOR2##_i<TENSOR2##_size)&&(TENSOR3##_i<TENSOR3##_size); ++TENSOR1##_i,++TENSOR2##_i,++TENSOR3##_i,++count) { \
|
||||
CODE \
|
||||
TENSOR1##_data += TENSOR1##_stride; \
|
||||
TENSOR2##_data += TENSOR2##_stride; \
|
||||
TENSOR3##_data += TENSOR3##_stride; \
|
||||
} \
|
||||
if (count < line_seg_length) { \
|
||||
__TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR1); \
|
||||
__TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR2); \
|
||||
__TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR3); \
|
||||
} \
|
||||
} \
|
||||
if (TENSOR1##_counter_tmp != NULL) { \
|
||||
THFree(TENSOR1##_counter_tmp); \
|
||||
} \
|
||||
if (TENSOR2##_counter_tmp != NULL) { \
|
||||
THFree(TENSOR2##_counter_tmp); \
|
||||
} \
|
||||
if (TENSOR3##_counter_tmp != NULL) { \
|
||||
THFree(TENSOR3##_counter_tmp); \
|
||||
} \
|
||||
}); \
|
||||
} \
|
||||
if (TENSOR1##_counter != NULL) { \
|
||||
THFree(TENSOR1##_counter); \
|
||||
} \
|
||||
if (TENSOR2##_counter != NULL) { \
|
||||
THFree(TENSOR2##_counter); \
|
||||
} \
|
||||
if (TENSOR3##_counter != NULL) { \
|
||||
THFree(TENSOR3##_counter); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TH_TENSOR_APPLY_REDUCTION_SUM_PARALLEL(TYPE, TENSOR, EXPR, OUTPUT, THRESHOLD) \
|
||||
{ \
|
||||
int TENSOR##Contig = THTensor_(isContiguous)(TENSOR); \
|
||||
ptrdiff_t TENSOR##Size = THTensor_(nElement)(TENSOR); \
|
||||
if (TENSOR##Contig) { \
|
||||
TYPE *rp = THTensor_getStoragePtr(TENSOR)->data<TYPE>()+TENSOR->storage_offset(); \
|
||||
OUTPUT = at::parallel_reduce(0, TENSOR##Size, (THRESHOLD * 10), (accreal)0, [&](int64_t begin, int64_t end, accreal ident)->accreal { \
|
||||
accreal r = ident; \
|
||||
for (auto iter = begin; iter < end; iter++) { \
|
||||
TYPE *TENSOR##_data = rp+iter; \
|
||||
r += (EXPR); \
|
||||
} \
|
||||
return r; \
|
||||
}, std::plus<accreal>()); \
|
||||
} else { \
|
||||
int TH_TENSOR_APPLY_hasFinished = 0; \
|
||||
int64_t TH_TENSOR_dim_index = 0; \
|
||||
__TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, -1, 1); \
|
||||
if (0 == TH_TENSOR_APPLY_hasFinished) { \
|
||||
auto TENSOR##_data_local = TENSOR##_data; \
|
||||
auto TENSOR##_i_local = TENSOR##_i; \
|
||||
OUTPUT = at::parallel_reduce(0, TENSOR##Size, THRESHOLD, (accreal)0, [&](int64_t begin, int64_t end, accreal ident)->accreal { \
|
||||
auto TENSOR##_data = TENSOR##_data_local; \
|
||||
auto TENSOR##_i = TENSOR##_i_local; \
|
||||
ptrdiff_t line_index_start = begin; \
|
||||
ptrdiff_t line_seg_length = (end - begin); \
|
||||
__TH_TENSOR_APPLYX_CAL_MEMORY_OFFSET(TENSOR); \
|
||||
TENSOR##_data += TENSOR##_memory_offset; \
|
||||
ptrdiff_t count = 0; \
|
||||
ptrdiff_t TENSOR##_start = TENSOR##_counter_tmp[TENSOR##_dim - 1]; \
|
||||
accreal r = ident; \
|
||||
while (count < line_seg_length) { \
|
||||
for (TENSOR##_i=TENSOR##_start; (count < line_seg_length)&&(TENSOR##_i < TENSOR##_size); ++TENSOR##_i, ++count) { \
|
||||
r += (EXPR); \
|
||||
TENSOR##_data += TENSOR##_stride; \
|
||||
} \
|
||||
if (count < line_seg_length) { \
|
||||
__TH_TENSOR_APPLYX_UPDATE_COUNTERS_PARALLEL(TENSOR); \
|
||||
} \
|
||||
} \
|
||||
if (TENSOR##_counter_tmp != NULL) { \
|
||||
THFree(TENSOR##_counter_tmp); \
|
||||
} \
|
||||
return r; \
|
||||
}, std::plus<accreal>()); \
|
||||
} \
|
||||
if (TENSOR##_counter != NULL) { \
|
||||
THFree(TENSOR##_counter); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TH_TENSOR_APPLY_CONTIG(TYPE, TENSOR, CODE) \
|
||||
{ \
|
||||
auto code_fn = [&](int64_t begin, int64_t end) { \
|
||||
ptrdiff_t TENSOR##_len = end - begin; \
|
||||
TYPE *TENSOR##_data = TENSOR->data<scalar_t>() + begin; \
|
||||
CODE \
|
||||
}; \
|
||||
int in_parallel = at::in_parallel_region(); \
|
||||
ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR); \
|
||||
if (!in_parallel) { \
|
||||
at::parallel_for(0, TH_TENSOR_size, TH_OMP_OVERHEAD_THRESHOLD, code_fn); \
|
||||
} else { \
|
||||
code_fn(0, TH_TENSOR_size); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TH_TENSOR_APPLY2_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
|
||||
{ \
|
||||
auto code_fn = [&](int64_t begin, int64_t end) { \
|
||||
ptrdiff_t TENSOR1##_len = end - begin; \
|
||||
TYPE1 *TENSOR1##_data = TENSOR1->data<scalar_t>() + begin; \
|
||||
TYPE2 *TENSOR2##_data = TENSOR2->data<scalar_t>() + begin; \
|
||||
CODE \
|
||||
}; \
|
||||
int in_parallel = at::in_parallel_region(); \
|
||||
ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR1); \
|
||||
if (!in_parallel) { \
|
||||
at::parallel_for(0, TH_TENSOR_size, TH_OMP_OVERHEAD_THRESHOLD, code_fn); \
|
||||
} else { \
|
||||
code_fn(0, TH_TENSOR_size); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TH_TENSOR_APPLY3_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \
|
||||
{ \
|
||||
auto code_fn = [&](int64_t begin, int64_t end) { \
|
||||
ptrdiff_t TENSOR1##_len = end - begin; \
|
||||
TYPE1 *TENSOR1##_data = TENSOR1->data<scalar_t>() + begin; \
|
||||
TYPE2 *TENSOR2##_data = TENSOR2->data<scalar_t>() + begin; \
|
||||
TYPE3 *TENSOR3##_data = TENSOR3->data<scalar_t>() + begin; \
|
||||
CODE \
|
||||
}; \
|
||||
int in_parallel = at::in_parallel_region(); \
|
||||
ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR1); \
|
||||
if (!in_parallel) { \
|
||||
at::parallel_for(0, TH_TENSOR_size, TH_OMP_OVERHEAD_THRESHOLD, code_fn); \
|
||||
} else { \
|
||||
code_fn(0, TH_TENSOR_size); \
|
||||
} \
|
||||
}
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "TH/generic/THTensorFastGetSet.hpp"
|
||||
#else
|
||||
|
||||
static inline scalar_t THTensor_(fastGetLegacy1dNoScalars)(THTensor *self, int64_t x0) {
|
||||
return self->unsafe_data<scalar_t>()[x0*THTensor_strideLegacyNoScalars(self, 0)];
|
||||
}
|
||||
|
||||
static inline scalar_t THTensor_(fastGet1d)(THTensor *self, int64_t x0) {
|
||||
return self->unsafe_data<scalar_t>()[x0*self->stride(0)];
|
||||
}
|
||||
|
||||
static inline scalar_t THTensor_(fastGet2d)(THTensor *self, int64_t x0, int64_t x1) {
|
||||
return self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)];
|
||||
}
|
||||
|
||||
static inline scalar_t THTensor_(fastGet3d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2) {
|
||||
return self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)];
|
||||
}
|
||||
|
||||
static inline scalar_t THTensor_(fastGet4d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, int64_t x3) {
|
||||
return self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)+x3*self->stride(3)];
|
||||
}
|
||||
|
||||
static inline scalar_t THTensor_(fastGet5d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, int64_t x3, int64_t x4) {
|
||||
return self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)+x3*self->stride(3)+(x4)*self->stride(4)];
|
||||
}
|
||||
|
||||
static inline void THTensor_(fastSet1d)(THTensor *self, int64_t x0, scalar_t value) {
|
||||
self->unsafe_data<scalar_t>()[x0*self->stride(0)] = value;
|
||||
}
|
||||
|
||||
static inline void THTensor_(fastSet2d)(THTensor *self, int64_t x0, int64_t x1, scalar_t value) {
|
||||
self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)] = value;
|
||||
}
|
||||
|
||||
static inline void THTensor_(fastSet3d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, scalar_t value) {
|
||||
self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)] = value;
|
||||
}
|
||||
|
||||
static inline void THTensor_(fastSet4d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, int64_t x3, scalar_t value) {
|
||||
self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)+x3*self->stride(3)] = value;
|
||||
}
|
||||
|
||||
static inline void THTensor_(fastSet5d)(THTensor *self, int64_t x0, int64_t x1, int64_t x2, int64_t x3, int64_t x4, scalar_t value) {
|
||||
self->unsafe_data<scalar_t>()[x0*self->stride(0)+x1*self->stride(1)+x2*self->stride(2)+x3*self->stride(3)+(x4)*self->stride(4)] = value;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "TH/generic/THTensorMath.h"
|
||||
#else
|
||||
|
||||
#include <ATen/core/Generator.h>
|
||||
|
||||
TH_API int THTensor_(equal)(THTensor *ta, THTensor *tb);
|
||||
|
||||
#if !defined(TH_REAL_IS_HALF)
|
||||
|
||||
TH_API ptrdiff_t THTensor_(numel)(THTensor *t);
|
||||
|
||||
#if !defined(TH_REAL_IS_BFLOAT16)
|
||||
|
||||
void THTensor_(preserveReduceDimSemantics)(THTensor *r_, int in_dims, int reduce_dimension, int keepdim);
|
||||
|
||||
TH_API void THTensor_(take)(THTensor *tensor, THTensor *src, THLongTensor *index);
|
||||
TH_API void THTensor_(put)(THTensor *tensor, THLongTensor *index, THTensor *src, int accumulate);
|
||||
|
||||
#if !defined(TH_REAL_IS_BOOL) /* non bool only part */
|
||||
|
||||
TH_API void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, int64_t k, int dimension, int keepdim);
|
||||
|
||||
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
|
||||
|
||||
TH_API void THTensor_(histc)(THTensor *hist, THTensor *tensor, int64_t nbins, scalar_t minvalue, scalar_t maxvalue);
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif /* !defined(TH_REAL_IS_HALF) */
|
||||
#endif /* TH_GENERIC_FILE*/
|
||||
|
|
@ -1,292 +0,0 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "TH/generic/THTensorMoreMath.cpp"
|
||||
#else
|
||||
|
||||
#include <TH/generic/THTensorApply.hpp>
|
||||
#include <ATen/CPUGeneratorImpl.h>
|
||||
#include <ATen/Utils.h>
|
||||
#include <ATen/NamedTensorUtils.h>
|
||||
#include <ATen/WrapDimUtils.h>
|
||||
#include <limits>
|
||||
|
||||
ptrdiff_t THTensor_(numel)(THTensor *t)
|
||||
{
|
||||
return THTensor_(nElement)(t);
|
||||
}
|
||||
|
||||
#if !defined(TH_REAL_IS_BFLOAT16) && !defined(TH_REAL_IS_HALF)
|
||||
|
||||
// Helper function to be used in a reduction operation.
|
||||
// Due to resize semantics of outputs, if the specified output tensor r_ has
|
||||
// same size as the output of the reduction operation, then any noncontiguities
|
||||
// in r_ should be preserved.
|
||||
// The reduction operation, however, needs to act on r_ with an extra dimension
|
||||
// (the reduced dimension), so this function "resizes" r_ and preserves its
|
||||
// noncontiguities if necessary.
|
||||
void THTensor_(preserveReduceDimSemantics)(
|
||||
THTensor *r_, int in_dims, int reduce_dimension, int keepdim) {
|
||||
if (r_ && !keepdim &&
|
||||
THTensor_(nDimensionLegacyAll)(r_) == in_dims - 1 &&
|
||||
THTensor_(nDimensionLegacyAll)(r_) != 0) {
|
||||
THTensor_(unsqueeze1d)(r_, r_, reduce_dimension);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(TH_REAL_IS_BOOL) /* non bool only part */
|
||||
|
||||
#define ARR(III) arr[(III)*stride]
|
||||
#define IDX(III) idx[(III)*stride]
|
||||
|
||||
#define LONG_SWAP(AAA, BBB) swap = AAA; AAA = BBB; BBB = swap
|
||||
#define REAL_SWAP(AAA, BBB) rswap = AAA; AAA = BBB; BBB = rswap
|
||||
|
||||
#define ARR_SWAP(III, JJJ) \
|
||||
REAL_SWAP(ARR(III), ARR(JJJ));
|
||||
|
||||
#define BOTH_SWAP(III, JJJ) \
|
||||
REAL_SWAP(ARR(III), ARR(JJJ)); \
|
||||
LONG_SWAP(IDX(III), IDX(JJJ))
|
||||
|
||||
/* Emulate NumPy behavior of putting NaNs
|
||||
* at the end of an ascending list. */
|
||||
#define GT_OR_NAN(x, y) \
|
||||
((th_isnan(x) && !(th_isnan(y))) || (x > y))
|
||||
|
||||
/* Implementation of the Quickselect algorithm, based on Nicolas Devillard's
|
||||
public domain implementation at http://ndevilla.free.fr/median/median/
|
||||
Adapted similarly to the above Quicksort algorithm. */
|
||||
static void THTensor_(quickselect)(scalar_t *arr, int64_t *idx, int64_t k, int64_t elements, int64_t stride)
|
||||
{
|
||||
int64_t P, L, R, i, j, swap;
|
||||
scalar_t rswap, piv;
|
||||
L = 0;
|
||||
R = elements-1;
|
||||
|
||||
do {
|
||||
if (R <= L) /* One element only */
|
||||
return;
|
||||
|
||||
if (R == L+1) { /* Two elements only */
|
||||
if (ARR(L) > ARR(R)) {
|
||||
BOTH_SWAP(L, R);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Use median of three for pivot choice */
|
||||
P=(L+R)>>1;
|
||||
BOTH_SWAP(P, L+1);
|
||||
if (ARR(L+1) > ARR(R)) { BOTH_SWAP(L+1, R); }
|
||||
if (ARR(L) > ARR(R)) { BOTH_SWAP(L, R); }
|
||||
if (ARR(L+1) > ARR(L)) { BOTH_SWAP(L+1, L); }
|
||||
|
||||
i = L+1;
|
||||
j = R;
|
||||
piv = ARR(L);
|
||||
do {
|
||||
do i++; while(ARR(i) < piv);
|
||||
do j--; while(ARR(j) > piv);
|
||||
if (j < i)
|
||||
break;
|
||||
BOTH_SWAP(i, j);
|
||||
} while(1);
|
||||
BOTH_SWAP(L, j);
|
||||
|
||||
/* Re-set active partition */
|
||||
if (j <= k) L=i;
|
||||
if (j >= k) R=j-1;
|
||||
} while(1);
|
||||
}
|
||||
|
||||
#undef ARR
|
||||
#undef IDX
|
||||
#undef LONG_SWAP
|
||||
#undef REAL_SWAP
|
||||
#undef BOTH_SWAP
|
||||
|
||||
void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, int64_t k, int dimension, int keepdim)
|
||||
{
|
||||
THTensor *temp_;
|
||||
THLongTensor *tempi_;
|
||||
scalar_t *temp__data;
|
||||
int64_t *tempi__data;
|
||||
int64_t t_size_dim;
|
||||
|
||||
THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(t), 3, "dimension out of range");
|
||||
THArgCheck(k > 0 && k <= THTensor_sizeLegacyNoScalars(t, dimension), 2, "selected index out of range");
|
||||
|
||||
int in_dims = THTensor_(nDimensionLegacyAll)(t);
|
||||
THTensor_(preserveReduceDimSemantics)(values_, in_dims, dimension, keepdim);
|
||||
THLongTensor_preserveReduceDimSemantics(indices_, in_dims, dimension, keepdim);
|
||||
std::vector<int64_t> dim = THTensor_sizesLegacyNoScalars(t);
|
||||
dim[dimension] = 1;
|
||||
THTensor_(resize)(values_, dim, {});
|
||||
THLongTensor_resize(indices_, dim, {});
|
||||
|
||||
t_size_dim = THTensor_sizeLegacyNoScalars(t, dimension);
|
||||
|
||||
temp_ = THTensor_(new)();
|
||||
THTensor_(resize1d)(temp_, t_size_dim);
|
||||
temp__data = temp_->data<scalar_t>();
|
||||
|
||||
tempi_ = THLongTensor_new();
|
||||
THLongTensor_resize1d(tempi_, t_size_dim);
|
||||
tempi__data = THLongTensor_data(tempi_);
|
||||
|
||||
TH_TENSOR_DIM_APPLY3(scalar_t, t, scalar_t, values_, int64_t, indices_, dimension,
|
||||
TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM,
|
||||
int64_t i;
|
||||
for(i = 0; i < t_size_dim; i++)
|
||||
temp__data[i] = t_data[i*t_stride];
|
||||
for(i = 0; i < t_size_dim; i++)
|
||||
tempi__data[i] = i;
|
||||
THTensor_(quickselect)(temp__data, tempi__data, k - 1, t_size_dim, 1);
|
||||
*values__data = temp__data[k-1];
|
||||
*indices__data = tempi__data[k-1];);
|
||||
|
||||
c10::raw::intrusive_ptr::decref(temp_);
|
||||
THLongTensor_free(tempi_);
|
||||
if (!keepdim) {
|
||||
THTensor_(squeeze1d)(values_, values_, dimension);
|
||||
THLongTensor_squeeze1d(indices_, indices_, dimension);
|
||||
}
|
||||
}
|
||||
|
||||
static void THTensor_(propagate_names_if_named_tensor_enabled)(THTensor* result, THTensor* src) {
|
||||
at::namedinference::propagate_names(result, src);
|
||||
}
|
||||
|
||||
#define LAB_IMPLEMENT_BASIC_FUNCTION_3_ARGS(NAME, CFUNC, THRESHOLD) \
|
||||
void THTensor_(NAME)(THTensor *r_, THTensor *t) \
|
||||
{ \
|
||||
THTensor_(resizeAs)(r_, t); \
|
||||
ptrdiff_t r_Size = THTensor_(nElement)(r_); \
|
||||
int r_Contig = THTensor_(isContiguous)(r_); \
|
||||
int tContig = THTensor_(isContiguous)(t); \
|
||||
TH_TENSOR_APPLY2_PARALLEL(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = CFUNC(*t_data);, THRESHOLD); \
|
||||
THTensor_(propagate_names_if_named_tensor_enabled)(r_, t); \
|
||||
}
|
||||
|
||||
#define LAB_IMPLEMENT_BASIC_FUNCTION_2_ARGS(NAME, CFUNC) \
|
||||
LAB_IMPLEMENT_BASIC_FUNCTION_3_ARGS(NAME, CFUNC, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD)
|
||||
|
||||
#define LAB_IMPLEMENT_VECTORIZED_FUNCTION_3_ARGS(NAME, CFUNC, THRESHOLD) \
|
||||
void THTensor_(NAME)(THTensor *r_, THTensor *t) \
|
||||
{ \
|
||||
THTensor_(resizeAs)(r_, t); \
|
||||
ptrdiff_t r_Size = THTensor_(nElement)(r_); \
|
||||
int r_Contig = THTensor_(isContiguous)(r_); \
|
||||
int tContig = THTensor_(isContiguous)(t); \
|
||||
if (r_Contig && tContig) { \
|
||||
TH_TENSOR_APPLY2_CONTIG(scalar_t, r_, scalar_t, t, THVector_(NAME)(r__data, t_data, r__len);); \
|
||||
} else { \
|
||||
TH_TENSOR_APPLY2_PARALLEL(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = CFUNC(*t_data);, THRESHOLD); \
|
||||
} \
|
||||
THTensor_(propagate_names_if_named_tensor_enabled)(r_, t); \
|
||||
}
|
||||
|
||||
#define LAB_IMPLEMENT_VECTORIZED_FUNCTION_2_ARGS(NAME, CFUNC) \
|
||||
LAB_IMPLEMENT_VECTORIZED_FUNCTION_3_ARGS(NAME, CFUNC, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD)
|
||||
|
||||
#define EXPAND(...) __VA_ARGS__
|
||||
|
||||
#define GET_4TH_ARG(ARG0, ARG1, ARG2, ARG3, ...) ARG3
|
||||
|
||||
#define LAB_IMPLEMENT_BASIC_FUNCTION_CHOOSE(...) \
|
||||
EXPAND(GET_4TH_ARG(__VA_ARGS__, LAB_IMPLEMENT_BASIC_FUNCTION_3_ARGS, LAB_IMPLEMENT_BASIC_FUNCTION_2_ARGS, ))
|
||||
|
||||
#define LAB_IMPLEMENT_VECTORIZED_FUNCTION_CHOOSE(...) \
|
||||
EXPAND(GET_4TH_ARG(__VA_ARGS__, LAB_IMPLEMENT_VECTORIZED_FUNCTION_3_ARGS, LAB_IMPLEMENT_VECTORIZED_FUNCTION_2_ARGS, ))
|
||||
|
||||
#define LAB_IMPLEMENT_BASIC_FUNCTION(...) EXPAND(LAB_IMPLEMENT_BASIC_FUNCTION_CHOOSE(__VA_ARGS__)(__VA_ARGS__))
|
||||
|
||||
#define LAB_IMPLEMENT_VECTORIZED_FUNCTION(...) EXPAND(LAB_IMPLEMENT_VECTORIZED_FUNCTION_CHOOSE(__VA_ARGS__)(__VA_ARGS__))
|
||||
|
||||
/*
|
||||
* LAB_IMPLEMENT_BASIC_FUNCTION is a macro with optional parameters, you can use it flexibly.
|
||||
* The macro will discard the invalid threshold if parallelization is unavailable.
|
||||
* The macro will give a default threshold even if you forget to pass one.
|
||||
* In other word,
|
||||
* (A), If parallelization is UNavailable, the two usage below is both right.
|
||||
* (1) LAB_IMPLEMENT_BASIC_FUNCTION(type_func, func_entity, OMP_OVERHEAD_THRESHOLD) // discard the invalid threshold
|
||||
* (2) LAB_IMPLEMENT_BASIC_FUNCTION(type_func, func_entity)
|
||||
* (B), If parallelization is available, the two usage below is also both right.
|
||||
* (1) LAB_IMPLEMENT_BASIC_FUNCTION(type_func, func_entity, OMP_OVERHEAD_THRESHOLD)
|
||||
* (2) LAB_IMPLEMENT_BASIC_FUNCTION(type_func, func_entity) // pass the default threshold
|
||||
* So do LAB_IMPLEMENT_VECTORIZED_FUNCTION.
|
||||
*/
|
||||
|
||||
LAB_IMPLEMENT_BASIC_FUNCTION(neg,-)
|
||||
|
||||
#if defined(TH_REAL_IS_LONG)
|
||||
LAB_IMPLEMENT_BASIC_FUNCTION(abs,std::abs)
|
||||
#endif /* int64_t only part */
|
||||
|
||||
#if defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_CHAR)
|
||||
LAB_IMPLEMENT_BASIC_FUNCTION(abs,abs)
|
||||
#endif /* int only part */
|
||||
|
||||
#if defined(TH_REAL_IS_BYTE)
|
||||
LAB_IMPLEMENT_BASIC_FUNCTION(abs,)
|
||||
#endif /* for byte, identity due to it being unsigned */
|
||||
|
||||
/* floating point only now */
|
||||
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
|
||||
|
||||
#if defined (TH_REAL_IS_FLOAT)
|
||||
#define TH_MATH_NAME(fn) fn##f
|
||||
#else
|
||||
#define TH_MATH_NAME(fn) fn
|
||||
#endif
|
||||
|
||||
LAB_IMPLEMENT_BASIC_FUNCTION(abs,TH_MATH_NAME(fabs))
|
||||
|
||||
LAB_IMPLEMENT_BASIC_FUNCTION(cosh,TH_MATH_NAME(cosh),HYPER_TH_OMP_OVERHEAD_THRESHOLD)
|
||||
LAB_IMPLEMENT_BASIC_FUNCTION(tanh,TH_MATH_NAME(tanh),HYPER_TH_OMP_OVERHEAD_THRESHOLD)
|
||||
|
||||
void THTensor_(histc)(THTensor *hist, THTensor *tensor, int64_t nbins, scalar_t minvalue, scalar_t maxvalue)
|
||||
{
|
||||
if (nbins <= 0) {
|
||||
THError("bins must be > 0");
|
||||
}
|
||||
scalar_t minval;
|
||||
scalar_t maxval;
|
||||
scalar_t *h_data;
|
||||
|
||||
THTensor_(resize1d)(hist, nbins);
|
||||
THTensor_wrap(hist).zero_();
|
||||
minval = minvalue;
|
||||
maxval = maxvalue;
|
||||
if (minval == maxval)
|
||||
{
|
||||
minval = THTensor_wrap(tensor).min().item<scalar_t>();
|
||||
maxval = THTensor_wrap(tensor).max().item<scalar_t>();
|
||||
}
|
||||
if (minval == maxval)
|
||||
{
|
||||
minval = minval - 1;
|
||||
maxval = maxval + 1;
|
||||
}
|
||||
|
||||
TORCH_CHECK(!(std::isinf(minval) || std::isinf(maxval) || std::isnan(minval) || std::isnan(maxval)), "range of [", minval, ", ", maxval, "] is not finite");
|
||||
TORCH_CHECK(minval < maxval, "max must be larger than min");
|
||||
|
||||
h_data = hist->data<scalar_t>();
|
||||
|
||||
TH_TENSOR_APPLY(scalar_t, tensor,
|
||||
if (*tensor_data >= minval && *tensor_data <= maxval) {
|
||||
const int bin = (int)((*tensor_data-minval) / (maxval-minval) * nbins);
|
||||
h_data[THMin(bin, nbins-1)] += 1;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#undef TH_MATH_NAME
|
||||
#endif /* floating point only part */
|
||||
#undef IS_NONZERO
|
||||
|
||||
#endif /* !defined(TH_REAL_IS_BOOL) */
|
||||
|
||||
#endif /* TH_GENERIC_FILE */
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "TH/generic/THVector.h"
|
||||
#else
|
||||
#if !defined(TH_REAL_IS_BOOL) /* non bool only part */
|
||||
|
||||
TH_API void THVector_(neg)(scalar_t *y, const scalar_t *x, const ptrdiff_t n);
|
||||
|
||||
#endif /* non bool only part */
|
||||
|
||||
/* floating point only now */
|
||||
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
|
||||
|
||||
TH_API void THVector_(erfc)(scalar_t *y, const scalar_t *x, const ptrdiff_t n);
|
||||
TH_API void THVector_(pow)(scalar_t *y, const scalar_t *x, const scalar_t c, const ptrdiff_t n);
|
||||
|
||||
#endif /* floating point only part */
|
||||
|
||||
#endif
|
||||
|
|
@ -1078,11 +1078,9 @@ aten_native_source_non_codegen_list = [
|
|||
"aten/src/ATen/native/sparse/SparseCsrTensor.cpp",
|
||||
"aten/src/ATen/native/sparse/SparseTensorMath.cpp",
|
||||
"aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp",
|
||||
"aten/src/TH/THBlas.cpp",
|
||||
"aten/src/TH/THGeneral.cpp",
|
||||
"aten/src/TH/THStorageFunctions.cpp",
|
||||
"aten/src/TH/THTensor.cpp",
|
||||
"aten/src/TH/THTensorMoreMath.cpp",
|
||||
"aten/src/ATen/native/utils/Factory.cpp",
|
||||
"aten/src/ATen/native/xnnpack/Activation.cpp",
|
||||
"aten/src/ATen/native/xnnpack/ChannelShuffle.cpp",
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user