improve docker packages, fix bugs, enable tests, enable FFT (#10893)

Summary:
* improve docker packages (install OpenBLAS to have at-compile-time LAPACK functionality w/ optimizations for both Intel and AMD CPUs)
* integrate rocFFT (i.e., enable Fourier functionality)
* fix bugs in ROCm caused by wrong warp size
* enable more test sets, skip the tests that don't work on ROCm yet
* don't disable asserts any longer in hipification
* small improvements
Pull Request resolved: https://github.com/pytorch/pytorch/pull/10893

Differential Revision: D9615053

Pulled By: ezyang

fbshipit-source-id: 864b4d27bf089421f7dfd8065e5017f9ea2f7b3b
This commit is contained in:
iotamudelta 2018-09-02 08:42:51 -07:00 committed by Facebook Github Bot
parent abe8b3391d
commit 33c7cc13ca
23 changed files with 794 additions and 238 deletions

View File

@ -90,8 +90,13 @@ public:
IntList output_sizes) { IntList output_sizes) {
// signal sizes // signal sizes
#ifdef __HIP_PLATFORM_HCC__
std::vector<int> signal_sizes(checked_signal_sizes.begin(),
checked_signal_sizes.end());
#else
std::vector<long long int> signal_sizes(checked_signal_sizes.begin(), std::vector<long long int> signal_sizes(checked_signal_sizes.begin(),
checked_signal_sizes.end()); checked_signal_sizes.end());
#endif
// input batch size // input batch size
long long int batch = input.size(0); long long int batch = input.size(0);
@ -149,7 +154,11 @@ public:
// TODO: Figure out why windows fails to compile // TODO: Figure out why windows fails to compile
// at::optional<std::vector<long long int>> inembed_opt = at::nullopt; // at::optional<std::vector<long long int>> inembed_opt = at::nullopt;
// Then move the following to a helper function. // Then move the following to a helper function.
#ifdef __HIP_PLATFORM_HCC__
std::vector<int> inembed(signal_ndim);
#else
std::vector<long long int> inembed(signal_ndim); std::vector<long long int> inembed(signal_ndim);
#endif
if (!clone_input) { if (!clone_input) {
auto istrides = input.strides(); auto istrides = input.strides();
auto last_istride = istrides[signal_ndim]; auto last_istride = istrides[signal_ndim];
@ -192,6 +201,37 @@ public:
inembed.begin()); // begin of output inembed.begin()); // begin of output
} }
#ifdef __HIP_PLATFORM_HCC__
hipfftType exec_type;
if (input.type().scalarType() == ScalarType::Float) {
if (complex_input && complex_output) {
exec_type = HIPFFT_C2C;
} else if (complex_input && !complex_output) {
exec_type = HIPFFT_C2R;
} else if (!complex_input && complex_output) {
exec_type = HIPFFT_R2C;
} else {
throw std::runtime_error("hipFFT doesn't support r2r (float)");
}
} else if (input.type().scalarType() == ScalarType::Double) {
if (complex_input && complex_output) {
exec_type = HIPFFT_Z2Z;
} else if (complex_input && !complex_output) {
exec_type = HIPFFT_Z2D;
} else if (!complex_input && complex_output) {
exec_type = HIPFFT_D2Z;
} else {
throw std::runtime_error("hipFFT doesn't support r2r (double)");
}
} else {
std::ostringstream ss;
ss << "hipFFT doesn't support tensor of type: "
<< at::toString(input.type().scalarType());
throw std::runtime_error(ss.str());
}
#else
cudaDataType itype, otype, exec_type; cudaDataType itype, otype, exec_type;
if (input.type().scalarType() == ScalarType::Float) { if (input.type().scalarType() == ScalarType::Float) {
itype = complex_input ? CUDA_C_32F : CUDA_R_32F; itype = complex_input ? CUDA_C_32F : CUDA_R_32F;
@ -211,6 +251,7 @@ public:
<< at::toString(input.type().scalarType()); << at::toString(input.type().scalarType());
throw std::runtime_error(ss.str()); throw std::runtime_error(ss.str());
} }
#endif
// create plan // create plan
auto raw_plan_ptr = new cufftHandle(); auto raw_plan_ptr = new cufftHandle();
@ -229,10 +270,17 @@ public:
// by assuming base_istride = base_ostride = 1. // by assuming base_istride = base_ostride = 1.
// //
// See NOTE [ cuFFT Embedded Strides ] in native/cuda/SpectralOps.cu. // See NOTE [ cuFFT Embedded Strides ] in native/cuda/SpectralOps.cu.
#ifdef __HIP_PLATFORM_HCC__
CUFFT_CHECK(hipfftMakePlanMany(plan(), signal_ndim, signal_sizes.data(),
/* inembed */ nullptr, /* base_istride */ 1, /* idist */ 1,
/* onembed */ nullptr, /* base_ostride */ 1, /* odist */ 1,
exec_type, batch, &ws_size_t));
#else
CUFFT_CHECK(cufftXtMakePlanMany(plan(), signal_ndim, signal_sizes.data(), CUFFT_CHECK(cufftXtMakePlanMany(plan(), signal_ndim, signal_sizes.data(),
/* inembed */ nullptr, /* base_istride */ 1, /* idist */ 1, itype, /* inembed */ nullptr, /* base_istride */ 1, /* idist */ 1, itype,
/* onembed */ nullptr, /* base_ostride */ 1, /* odist */ 1, otype, /* onembed */ nullptr, /* base_ostride */ 1, /* odist */ 1, otype,
batch, &ws_size_t, exec_type)); batch, &ws_size_t, exec_type));
#endif
} else { } else {
// set idist (stride at batch dim) // set idist (stride at batch dim)
// set base_istride (stride at innermost dim of signal) // set base_istride (stride at innermost dim of signal)
@ -254,6 +302,18 @@ public:
} }
// set odist, onembed, base_ostride // set odist, onembed, base_ostride
#ifdef __HIP_PLATFORM_HCC__
int odist = at::prod_intlist(output_sizes.slice(1, signal_ndim));
std::vector<int> onembed(output_sizes.data() + 1, output_sizes.data() + signal_ndim + 1);
int base_ostride = 1;
int istride = base_istride;
int iidist = idist;
CUFFT_CHECK(hipfftMakePlanMany(plan(), signal_ndim, signal_sizes.data(),
inembed.data(), istride, iidist,
onembed.data(), base_ostride, odist,
exec_type, batch, &ws_size_t));
#else
long long int odist = at::prod_intlist(output_sizes.slice(1, signal_ndim)); long long int odist = at::prod_intlist(output_sizes.slice(1, signal_ndim));
std::vector<long long int> onembed(output_sizes.data() + 1, output_sizes.data() + signal_ndim + 1); std::vector<long long int> onembed(output_sizes.data() + 1, output_sizes.data() + signal_ndim + 1);
long long int base_ostride = 1; long long int base_ostride = 1;
@ -262,11 +322,16 @@ public:
inembed.data(), base_istride, idist, itype, inembed.data(), base_istride, idist, itype,
onembed.data(), base_ostride, odist, otype, onembed.data(), base_ostride, odist, otype,
batch, &ws_size_t, exec_type)); batch, &ws_size_t, exec_type));
} #endif
}
ws_size = static_cast<int64_t>(ws_size_t); ws_size = static_cast<int64_t>(ws_size_t);
} }
#ifdef __HIP_PLATFORM_HCC__
cufftHandle &plan() const { return *plan_ptr.get(); }
#else
const cufftHandle &plan() const { return *plan_ptr.get(); } const cufftHandle &plan() const { return *plan_ptr.get(); }
#endif
bool should_clone_input() const { return clone_input; } bool should_clone_input() const { return clone_input; }

View File

@ -49,8 +49,10 @@ static inline std::string _cudaGetErrorEnum(cufftResult error)
return "CUFFT_NO_WORKSPACE"; return "CUFFT_NO_WORKSPACE";
case CUFFT_NOT_IMPLEMENTED: case CUFFT_NOT_IMPLEMENTED:
return "CUFFT_NOT_IMPLEMENTED"; return "CUFFT_NOT_IMPLEMENTED";
#ifndef __HIP_PLATFORM_HCC__
case CUFFT_LICENSE_ERROR: case CUFFT_LICENSE_ERROR:
return "CUFFT_LICENSE_ERROR"; return "CUFFT_LICENSE_ERROR";
#endif
case CUFFT_NOT_SUPPORTED: case CUFFT_NOT_SUPPORTED:
return "CUFFT_NOT_SUPPORTED"; return "CUFFT_NOT_SUPPORTED";
default: default:

View File

@ -189,8 +189,45 @@ static inline Tensor _run_cufft(
CUFFT_CHECK(cufftSetWorkArea(plan, ws.data_ptr())); CUFFT_CHECK(cufftSetWorkArea(plan, ws.data_ptr()));
// run // run
#ifdef __HIP_PLATFORM_HCC__
if (input.type().scalarType() == ScalarType::Float) {
if (complex_input && complex_output) {
CUFFT_CHECK(hipfftExecC2C(plan, static_cast<hipfftComplex*>(input.data_ptr()),
static_cast<hipfftComplex*>(output.data_ptr()),
inverse ? HIPFFT_BACKWARD : HIPFFT_FORWARD));
} else if (complex_input && !complex_output) {
CUFFT_CHECK(hipfftExecC2R(plan, static_cast<hipfftComplex*>(input.data_ptr()),
static_cast<hipfftReal*>(output.data_ptr())));
} else if (!complex_input && complex_output) {
CUFFT_CHECK(hipfftExecR2C(plan, static_cast<hipfftReal*>(input.data_ptr()),
static_cast<hipfftComplex*>(output.data_ptr())));
} else {
throw std::runtime_error("hipFFT doesn't support r2r (float)");
}
} else if (input.type().scalarType() == ScalarType::Double) {
if (complex_input && complex_output) {
CUFFT_CHECK(hipfftExecZ2Z(plan, static_cast<hipfftDoubleComplex*>(input.data_ptr()),
static_cast<hipfftDoubleComplex*>(output.data_ptr()),
inverse ? HIPFFT_BACKWARD : HIPFFT_FORWARD));
} else if (complex_input && !complex_output) {
CUFFT_CHECK(hipfftExecZ2D(plan, static_cast<hipfftDoubleComplex*>(input.data_ptr()),
static_cast<hipfftDoubleReal*>(output.data_ptr())));
} else if (!complex_input && complex_output) {
CUFFT_CHECK(hipfftExecD2Z(plan, static_cast<hipfftDoubleReal*>(input.data_ptr()),
static_cast<hipfftDoubleComplex*>(output.data_ptr())));
} else {
throw std::runtime_error("hipFFT doesn't support r2r (double)");
}
} else {
std::ostringstream ss;
ss << "hipFFT doesn't support tensor of type: "
<< at::toString(input.type().scalarType());
throw std::runtime_error(ss.str());
}
#else
CUFFT_CHECK(cufftXtExec(plan, input.data_ptr(), output.data_ptr(), CUFFT_CHECK(cufftXtExec(plan, input.data_ptr(), output.data_ptr(),
inverse ? CUFFT_INVERSE : CUFFT_FORWARD)); inverse ? CUFFT_INVERSE : CUFFT_FORWARD));
#endif
// rescale if needed by normalized flag or inverse transform // rescale if needed by normalized flag or inverse transform
auto size_last_signal_dim = checked_signal_sizes[signal_ndim - 1]; auto size_last_signal_dim = checked_signal_sizes[signal_ndim - 1];

View File

@ -138,8 +138,10 @@ static inline __device__ void atomicAdd(double *address, double val) {
} while (assumed != old); } while (assumed != old);
} }
#elif !defined(__CUDA_ARCH__) && (CUDA_VERSION < 8000) || defined(__HIP_PLATFORM_HCC__) #elif !defined(__CUDA_ARCH__) && (CUDA_VERSION < 8000) || defined(__HIP_PLATFORM_HCC__)
#if defined(__HIP_PLATFORM_HCC__) && __hcc_workweek__ < 18312
// This needs to be defined for the host side pass // This needs to be defined for the host side pass
static inline __device__ void atomicAdd(double *address, double val) { } static inline __device__ void atomicAdd(double *address, double val) { }
#endif #endif
#endif
#endif // THC_ATOMICS_INC #endif // THC_ATOMICS_INC

View File

@ -4,6 +4,12 @@
#include "THCAsmUtils.cuh" #include "THCAsmUtils.cuh"
#include "THCDeviceUtils.cuh" #include "THCDeviceUtils.cuh"
#if defined(__HIP_PLATFORM_HCC__)
#define SCAN_UTILS_WARP_SIZE 64
#else
#define SCAN_UTILS_WARP_SIZE 32
#endif
// Collection of in-kernel scan / prefix sum utilities // Collection of in-kernel scan / prefix sum utilities
// Inclusive Scan via an upsweep/downsweep mechanism. Assumes: // Inclusive Scan via an upsweep/downsweep mechanism. Assumes:
@ -157,7 +163,7 @@ __device__ void inclusiveBinaryPrefixScan(T* smem, bool in, T* out, BinaryFuncti
T index = __popc(getLaneMaskLe() & vote); T index = __popc(getLaneMaskLe() & vote);
T carry = __popc(vote); T carry = __popc(vote);
int warp = threadIdx.x / 32; int warp = threadIdx.x / SCAN_UTILS_WARP_SIZE;
// Per each warp, write out a value // Per each warp, write out a value
if (getLaneId() == 0) { if (getLaneId() == 0) {
@ -170,7 +176,7 @@ __device__ void inclusiveBinaryPrefixScan(T* smem, bool in, T* out, BinaryFuncti
// warp shuffle scan for CC 3.0+ // warp shuffle scan for CC 3.0+
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
int current = 0; int current = 0;
for (int i = 0; i < blockDim.x / 32; ++i) { for (int i = 0; i < blockDim.x / SCAN_UTILS_WARP_SIZE; ++i) {
T v = smem[i]; T v = smem[i];
smem[i] = binop(smem[i], current); smem[i] = binop(smem[i], current);
current = binop(current, v); current = binop(current, v);
@ -201,11 +207,13 @@ __device__ void exclusiveBinaryPrefixScan(T* smem, bool in, T* out, T* carry, Bi
*out -= (T) in; *out -= (T) in;
// The outgoing carry for all threads is the last warp's sum // The outgoing carry for all threads is the last warp's sum
*carry = smem[(blockDim.x / 32) - 1]; *carry = smem[(blockDim.x / SCAN_UTILS_WARP_SIZE) - 1];
if (KillWARDependency) { if (KillWARDependency) {
__syncthreads(); __syncthreads();
} }
} }
#undef SCAN_UTILS_WARP_SIZE
#endif // THC_SCAN_UTILS_INC #endif // THC_SCAN_UTILS_INC

View File

@ -213,7 +213,11 @@ __device__ DataType findPattern(DataType* smem,
IndexType withinSliceStride, IndexType withinSliceStride,
BitDataType desired, BitDataType desired,
BitDataType desiredMask) { BitDataType desiredMask) {
#ifdef __HIP_PLATFORM_HCC__
if (threadIdx.x < 64) {
#else
if (threadIdx.x < 32) { if (threadIdx.x < 32) {
#endif
smem[threadIdx.x] = ScalarConvert<int, DataType>::to(0); smem[threadIdx.x] = ScalarConvert<int, DataType>::to(0);
} }
__syncthreads(); __syncthreads();
@ -366,7 +370,11 @@ __global__ void gatherTopK(TensorInfo<T, IndexType> input,
IndexType indicesWithinSliceStride) { IndexType indicesWithinSliceStride) {
// Indices are limited to integer fp precision, so counts can fit in // Indices are limited to integer fp precision, so counts can fit in
// int32, regardless of IndexType // int32, regardless of IndexType
#ifdef __HIP_PLATFORM_HCC__
__shared__ int smem[64];
#else
__shared__ int smem[32]; // one per each warp, up to warp limit __shared__ int smem[32]; // one per each warp, up to warp limit
#endif
IndexType slice = getLinearBlockId<IndexType>(); IndexType slice = getLinearBlockId<IndexType>();
if (slice >= numInputSlices) { if (slice >= numInputSlices) {

View File

@ -29,21 +29,24 @@ THC_API void THCTensor_(topk)(THCState* state,
THCTensor_(resize)(state, topK, topKSize, {}); THCTensor_(resize)(state, topK, topKSize, {});
THCudaLongTensor_resize(state, indices, topKSize, {}); THCudaLongTensor_resize(state, indices, topKSize, {});
// static_cast is required to ensure that the correct type (INDEX_T)
// is provided to the kernel for the arguments.
#define RUN_K(INDEX_T, DIM, DIR) \ #define RUN_K(INDEX_T, DIM, DIR) \
gatherTopK<real, INDEX_T, DIM, DIR> \ gatherTopK<real, INDEX_T, DIM, DIR> \
<<<grid, block, 0, THCState_getCurrentStream(state)>>>( \ <<<grid, block, 0, THCState_getCurrentStream(state)>>>( \
inputInfo, \ inputInfo, \
sliceSize, \ static_cast<INDEX_T>(sliceSize), \
k, \ static_cast<INDEX_T>(k), \
inputSlices, \ static_cast<INDEX_T>(inputSlices), \
/* The actual dimension that the k-selection is running in */ \ /* The actual dimension that the k-selection is running in */ \
/* may have changed from collapseDims() */ \ /* may have changed from collapseDims() */ \
inputInfo.strides[collapseInputDim], \ static_cast<INDEX_T>(inputInfo.strides[collapseInputDim]), \
topKInfo, \ topKInfo, \
topKSlices, \ static_cast<INDEX_T>(topKSlices), \
topKInfo.strides[collapseTopKDim], \ static_cast<INDEX_T>(topKInfo.strides[collapseTopKDim]), \
indicesInfo, \ indicesInfo, \
indicesInfo.strides[collapseIndicesDim]) static_cast<INDEX_T>(indicesInfo.strides[collapseIndicesDim]))
#define RUN_DIR(INDEX_T, DIM) \ #define RUN_DIR(INDEX_T, DIM) \
if (dir) { \ if (dir) { \
@ -63,6 +66,12 @@ THC_API void THCTensor_(topk)(THCState* state,
RUN_DIR(INDEX_T, -1); \ RUN_DIR(INDEX_T, -1); \
} }
#ifdef __HIP_PLATFORM_HCC__
#define TOPK_WARP_SIZE 64
#else
#define TOPK_WARP_SIZE 32
#endif
#define RUN_T(INDEX_T) \ #define RUN_T(INDEX_T) \
TensorInfo<real, INDEX_T> inputInfo = \ TensorInfo<real, INDEX_T> inputInfo = \
getTensorInfo<real, THCTensor, INDEX_T>(state, input); \ getTensorInfo<real, THCTensor, INDEX_T>(state, input); \
@ -96,7 +105,7 @@ THC_API void THCTensor_(topk)(THCState* state,
THError("Slice to sort is too large"); \ THError("Slice to sort is too large"); \
} \ } \
\ \
dim3 block(std::min(THCRoundUp(sliceSize, (int64_t) 32), (int64_t) 1024)); \ dim3 block(std::min(THCRoundUp(sliceSize, (int64_t) TOPK_WARP_SIZE), (int64_t) 1024)); \
\ \
/* This is used as a template parameter to calculate indices. */ \ /* This is used as a template parameter to calculate indices. */ \
/* We only specialize it if all collapsed dim sizes are the */ \ /* We only specialize it if all collapsed dim sizes are the */ \
@ -124,6 +133,7 @@ THC_API void THCTensor_(topk)(THCState* state,
#undef RUN_DIM #undef RUN_DIM
#undef RUN_DIR #undef RUN_DIR
#undef RUN_K #undef RUN_K
#undef TOPK_WARP_SIZE
// Sort the results if the user wants them sorted, since our // Sort the results if the user wants them sorted, since our
// selection routine does not ensure sorting // selection routine does not ensure sorting

View File

@ -558,6 +558,7 @@ endif()
if(USE_ROCM) if(USE_ROCM)
include_directories(SYSTEM ${HIP_PATH}/include) include_directories(SYSTEM ${HIP_PATH}/include)
include_directories(SYSTEM ${ROCBLAS_PATH}/include) include_directories(SYSTEM ${ROCBLAS_PATH}/include)
include_directories(SYSTEM ${ROCFFT_PATH}/include)
include_directories(SYSTEM ${HIPSPARSE_PATH}/include) include_directories(SYSTEM ${HIPSPARSE_PATH}/include)
include_directories(SYSTEM ${HIPRAND_PATH}/include) include_directories(SYSTEM ${HIPRAND_PATH}/include)
include_directories(SYSTEM ${ROCRAND_PATH}/include) include_directories(SYSTEM ${ROCRAND_PATH}/include)

View File

@ -38,6 +38,13 @@ ELSE()
SET(ROCBLAS_PATH $ENV{ROCBLAS_PATH}) SET(ROCBLAS_PATH $ENV{ROCBLAS_PATH})
ENDIF() ENDIF()
# ROCFFT_PATH
IF(NOT DEFINED ENV{ROCFFT_PATH})
SET(ROCBLAS_PATH ${ROCM_PATH}/rocfft)
ELSE()
SET(ROCFFT_PATH $ENV{ROCFFT_PATH})
ENDIF()
# HIPSPARSE_PATH # HIPSPARSE_PATH
IF(NOT DEFINED ENV{HIPSPARSE_PATH}) IF(NOT DEFINED ENV{HIPSPARSE_PATH})
SET(HIPSPARSE_PATH ${ROCM_PATH}/hcsparse) SET(HIPSPARSE_PATH ${ROCM_PATH}/hcsparse)
@ -106,11 +113,13 @@ IF(HIP_FOUND)
set(rocblas_DIR ${ROCBLAS_PATH}/lib/cmake/rocblas) set(rocblas_DIR ${ROCBLAS_PATH}/lib/cmake/rocblas)
set(miopen_DIR ${MIOPEN_PATH}/lib/cmake/miopen) set(miopen_DIR ${MIOPEN_PATH}/lib/cmake/miopen)
set(rocblas_DIR ${ROCBLAS_PATH}/lib/cmake/rocblas) set(rocblas_DIR ${ROCBLAS_PATH}/lib/cmake/rocblas)
set(rocfft_DIR ${ROCFFT_PATH}/lib/cmake/rocfft)
set(hipsparse_DIR ${HIPSPARSE_PATH}/lib/cmake/hipsparse) set(hipsparse_DIR ${HIPSPARSE_PATH}/lib/cmake/hipsparse)
find_package(rocrand REQUIRED) find_package(rocrand REQUIRED)
find_package(hiprand REQUIRED) find_package(hiprand REQUIRED)
find_package(rocblas REQUIRED) find_package(rocblas REQUIRED)
find_package(rocfft REQUIRED)
find_package(miopen REQUIRED) find_package(miopen REQUIRED)
#find_package(hipsparse REQUIRED) #find_package(hipsparse REQUIRED)

View File

@ -5,6 +5,7 @@ set -ex
install_ubuntu() { install_ubuntu() {
apt-get update apt-get update
apt-get install -y wget apt-get install -y wget
apt-get install -y libopenblas-dev
DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian
# Add rocm repository # Add rocm repository
@ -63,6 +64,15 @@ install_rocrand() {
dpkg -i /opt/rocm/debians/rocrand.deb dpkg -i /opt/rocm/debians/rocrand.deb
} }
# Install rocSPARSE/hipSPARSE that will be released soon - can co-exist w/ hcSPARSE which will be removed soon
install_hipsparse() {
mkdir -p /opt/rocm/debians
curl https://s3.amazonaws.com/ossci-linux/rocsparse-0.1.1.0.deb -o /opt/rocm/debians/rocsparse.deb
curl https://s3.amazonaws.com/ossci-linux/hipsparse-0.1.1.0.deb -o /opt/rocm/debians/hipsparse.deb
dpkg -i /opt/rocm/debians/rocsparse.deb
dpkg -i /opt/rocm/debians/hipsparse.deb
}
# Install Python packages depending on the base OS # Install Python packages depending on the base OS
if [ -f /etc/lsb-release ]; then if [ -f /etc/lsb-release ]; then
install_ubuntu install_ubuntu
@ -76,3 +86,4 @@ fi
install_hip_thrust install_hip_thrust
install_rocrand install_rocrand
install_hcsparse install_hcsparse
install_hipsparse

View File

@ -929,6 +929,7 @@ if USE_ROCM:
rocm_include_path = '/opt/rocm/include' rocm_include_path = '/opt/rocm/include'
hcc_include_path = '/opt/rocm/hcc/include' hcc_include_path = '/opt/rocm/hcc/include'
rocblas_include_path = '/opt/rocm/rocblas/include' rocblas_include_path = '/opt/rocm/rocblas/include'
rocfft_include_path = '/opt/rocm/rocfft/include'
hipsparse_include_path = '/opt/rocm/hcsparse/include' hipsparse_include_path = '/opt/rocm/hcsparse/include'
hiprand_include_path = '/opt/rocm/hiprand/include' hiprand_include_path = '/opt/rocm/hiprand/include'
rocrand_include_path = '/opt/rocm/rocrand/include' rocrand_include_path = '/opt/rocm/rocrand/include'
@ -937,6 +938,7 @@ if USE_ROCM:
include_dirs.append(rocm_include_path) include_dirs.append(rocm_include_path)
include_dirs.append(hcc_include_path) include_dirs.append(hcc_include_path)
include_dirs.append(rocblas_include_path) include_dirs.append(rocblas_include_path)
include_dirs.append(rocfft_include_path)
include_dirs.append(hipsparse_include_path) include_dirs.append(hipsparse_include_path)
include_dirs.append(hiprand_include_path) include_dirs.append(hiprand_include_path)
include_dirs.append(rocrand_include_path) include_dirs.append(rocrand_include_path)

View File

@ -2,12 +2,14 @@ r"""This file is allowed to initialize CUDA context when imported."""
import torch import torch
import torch.cuda import torch.cuda
from common import TEST_WITH_ROCM
TEST_CUDA = torch.cuda.is_available() TEST_CUDA = torch.cuda.is_available()
TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2 TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
CUDA_DEVICE = TEST_CUDA and torch.device("cuda:0") CUDA_DEVICE = TEST_CUDA and torch.device("cuda:0")
TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_acceptable(torch.tensor(1., device=CUDA_DEVICE)) # note: if ROCm is targeted, TEST_CUDNN is code for TEST_MIOPEN
TEST_CUDNN = TEST_CUDA and (TEST_WITH_ROCM or torch.backends.cudnn.is_acceptable(torch.tensor(1., device=CUDA_DEVICE)))
TEST_CUDNN_VERSION = TEST_CUDNN and torch.backends.cudnn.version() TEST_CUDNN_VERSION = TEST_CUDNN and torch.backends.cudnn.version()

View File

@ -7,7 +7,7 @@ from itertools import product
import torch import torch
import torch.cuda import torch.cuda
from torch.nn.functional import _Reduction from torch.nn.functional import _Reduction
from common import TestCase, to_gpu, freeze_rng_state, is_iterable from common import TestCase, to_gpu, freeze_rng_state, is_iterable, TEST_WITH_ROCM
from common_cuda import TEST_CUDA from common_cuda import TEST_CUDA
from torch.autograd.gradcheck import get_numerical_jacobian, iter_tensors from torch.autograd.gradcheck import get_numerical_jacobian, iter_tensors
import torch.backends.cudnn import torch.backends.cudnn
@ -40,7 +40,8 @@ module_tests = [
module_name='Linear', module_name='Linear',
constructor_args=(10, 8), constructor_args=(10, 8),
input_size=(4, 10), input_size=(4, 10),
reference_fn=lambda i, p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8) reference_fn=lambda i, p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8),
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='Linear', module_name='Linear',
@ -102,17 +103,20 @@ module_tests = [
constructor_args=(1,), constructor_args=(1,),
input_size=(10, 20), input_size=(10, 20),
reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1, True).expand(10, 20)), reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1, True).expand(10, 20)),
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='Softmax2d', module_name='Softmax2d',
input_size=(1, 3, 10, 20), input_size=(1, 3, 10, 20),
reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1, False)), reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1, False)),
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='LogSoftmax', module_name='LogSoftmax',
constructor_args=(1,), constructor_args=(1,),
input_size=(10, 20), input_size=(10, 20),
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1, True).expand(10, 20)).log_(), reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1, True).expand(10, 20)).log_(),
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='LogSoftmax', module_name='LogSoftmax',
@ -120,12 +124,14 @@ module_tests = [
input_size=(1, 3, 10, 20), input_size=(1, 3, 10, 20),
reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1, False)).log_(), reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1, False)).log_(),
desc='multiparam', desc='multiparam',
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='ELU', module_name='ELU',
constructor_args=(2.,), constructor_args=(2.,),
input_size=(3, 2, 5), input_size=(3, 2, 5),
reference_fn=lambda x, _: torch.where(x >= 0, x, 2 * (x.exp() - 1)) reference_fn=lambda x, _: torch.where(x >= 0, x, 2 * (x.exp() - 1)),
test_cuda=(not TEST_WITH_ROCM),
), ),
# TODO: reference function # TODO: reference function
dict( dict(
@ -198,6 +204,7 @@ module_tests = [
input_size=(2, 3, 4), input_size=(2, 3, 4),
desc='1d_multiparam', desc='1d_multiparam',
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0], reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='PReLU', module_name='PReLU',
@ -211,6 +218,7 @@ module_tests = [
input_size=(2, 3, 4, 5), input_size=(2, 3, 4, 5),
desc='2d_multiparam', desc='2d_multiparam',
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0], reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='PReLU', module_name='PReLU',
@ -224,26 +232,31 @@ module_tests = [
input_size=(2, 3, 4, 5, 6), input_size=(2, 3, 4, 5, 6),
desc='3d_multiparam', desc='3d_multiparam',
reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0], reference_fn=lambda i, p: torch.clamp(i, min=0) + torch.clamp(i, max=0) * p[0][0],
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='Softsign', module_name='Softsign',
input_size=(3, 2, 5), input_size=(3, 2, 5),
reference_fn=lambda i, _: i.div(1 + torch.abs(i)), reference_fn=lambda i, _: i.div(1 + torch.abs(i)),
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='Softmin', module_name='Softmin',
constructor_args=(1,), constructor_args=(1,),
input_size=(10, 20), input_size=(10, 20),
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='Softmin', module_name='Softmin',
constructor_args=(1,), constructor_args=(1,),
input_size=(2, 3, 5, 10), input_size=(2, 3, 5, 10),
desc='multidim', desc='multidim',
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='Tanhshrink', module_name='Tanhshrink',
input_size=(2, 3, 4, 5) input_size=(2, 3, 4, 5),
test_cuda=(not TEST_WITH_ROCM)
), ),
] ]
@ -560,6 +573,7 @@ criterion_tests = [
reference_fn=lambda i, t, m: reference_fn=lambda i, t, m:
kldivloss_reference(i, t, get_reduction(m)), kldivloss_reference(i, t, get_reduction(m)),
check_sum_reduction=True, check_sum_reduction=True,
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='MSELoss', module_name='MSELoss',
@ -576,6 +590,7 @@ criterion_tests = [
reference_fn=lambda i, t, m: -(t * i.log() + (1 - t) * (1 - i).log()).sum() / reference_fn=lambda i, t, m: -(t * i.log() + (1 - t) * (1 - i).log()).sum() /
(i.numel() if get_reduction(m) else 1), (i.numel() if get_reduction(m) else 1),
check_gradgrad=False, check_gradgrad=False,
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='BCELoss', module_name='BCELoss',
@ -586,6 +601,7 @@ criterion_tests = [
(i.numel() if get_reduction(m) else 1), (i.numel() if get_reduction(m) else 1),
desc='weights', desc='weights',
check_gradgrad=False, check_gradgrad=False,
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='CrossEntropyLoss', module_name='CrossEntropyLoss',
@ -606,6 +622,7 @@ criterion_tests = [
reference_fn=lambda i, t, m: reference_fn=lambda i, t, m:
hingeembeddingloss_reference(i, t, reduction=get_reduction(m)), hingeembeddingloss_reference(i, t, reduction=get_reduction(m)),
check_sum_reduction=True, check_sum_reduction=True,
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='HingeEmbeddingLoss', module_name='HingeEmbeddingLoss',
@ -616,6 +633,7 @@ criterion_tests = [
hingeembeddingloss_reference(i, t, margin=0.5, reduction=get_reduction(m)), hingeembeddingloss_reference(i, t, margin=0.5, reduction=get_reduction(m)),
desc='margin', desc='margin',
check_sum_reduction=True, check_sum_reduction=True,
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='MultiLabelMarginLoss', module_name='MultiLabelMarginLoss',
@ -642,6 +660,7 @@ criterion_tests = [
target_fn=lambda: torch.rand(5, 10).mul(2).floor(), target_fn=lambda: torch.rand(5, 10).mul(2).floor(),
reference_fn=lambda i, t, m: -(t * i.sigmoid().log() + (1 - t) * (-i).sigmoid().log()).sum() / i.numel(), reference_fn=lambda i, t, m: -(t * i.sigmoid().log() + (1 - t) * (-i).sigmoid().log()).sum() / i.numel(),
check_gradgrad=False, check_gradgrad=False,
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='MultiMarginLoss', module_name='MultiMarginLoss',
@ -720,6 +739,7 @@ criterion_tests = [
reference_fn=lambda i, t, m: reference_fn=lambda i, t, m:
cosineembeddingloss_reference(i[0], i[1], t, reduction=get_reduction(m)), cosineembeddingloss_reference(i[0], i[1], t, reduction=get_reduction(m)),
check_sum_reduction=True, check_sum_reduction=True,
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='CosineEmbeddingLoss', module_name='CosineEmbeddingLoss',
@ -730,6 +750,7 @@ criterion_tests = [
cosineembeddingloss_reference(i[0], i[1], t, margin=0.7, reduction=get_reduction(m)), cosineembeddingloss_reference(i[0], i[1], t, margin=0.7, reduction=get_reduction(m)),
desc='margin', desc='margin',
check_sum_reduction=True, check_sum_reduction=True,
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='MarginRankingLoss', module_name='MarginRankingLoss',
@ -738,6 +759,7 @@ criterion_tests = [
reference_fn=lambda i, t, m: reference_fn=lambda i, t, m:
marginrankingloss_reference(i[0], i[1], t, reduction=get_reduction(m)), marginrankingloss_reference(i[0], i[1], t, reduction=get_reduction(m)),
check_sum_reduction=True, check_sum_reduction=True,
test_cuda=(not TEST_WITH_ROCM)
), ),
dict( dict(
module_name='MarginRankingLoss', module_name='MarginRankingLoss',
@ -748,6 +770,7 @@ criterion_tests = [
marginrankingloss_reference(i[0], i[1], t, margin=0.5, reduction=get_reduction(m)), marginrankingloss_reference(i[0], i[1], t, margin=0.5, reduction=get_reduction(m)),
desc='margin', desc='margin',
check_sum_reduction=True, check_sum_reduction=True,
test_cuda=(not TEST_WITH_ROCM)
), ),
] ]

View File

@ -45,14 +45,10 @@ WINDOWS_BLACKLIST = [
ROCM_BLACKLIST = [ ROCM_BLACKLIST = [
'c10d', 'c10d',
'cpp_extensions', 'cpp_extensions',
'cuda',
'distributed', 'distributed',
'distributions', 'distributions',
'jit',
'legacy_nn',
'multiprocessing', 'multiprocessing',
'nccl', 'nccl',
'nn',
'thd_distributed', 'thd_distributed',
'utils', 'utils',
] ]

View File

@ -16,7 +16,7 @@ from torch._six import inf, nan
from test_torch import TestTorch from test_torch import TestTorch
from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests, \ from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests, \
PY3, IS_WINDOWS, NO_MULTIPROCESSING_SPAWN PY3, IS_WINDOWS, NO_MULTIPROCESSING_SPAWN, skipIfRocm, TEST_WITH_ROCM
# We cannot import TEST_CUDA and TEST_MULTIGPU from common_cuda here, # We cannot import TEST_CUDA and TEST_MULTIGPU from common_cuda here,
# because if we do that, the TEST_CUDNN line from common_cuda will be executed # because if we do that, the TEST_CUDNN line from common_cuda will be executed
@ -248,45 +248,62 @@ def new_t(*sizes):
# - disable inplace test, if set to True, no inplace test will be done (default=False) # - disable inplace test, if set to True, no inplace test will be done (default=False)
# - decorator, e.g., unittest.skipIf (default is no decorator) # - decorator, e.g., unittest.skipIf (default is no decorator)
tests = [ tests = [
('add', small_3d, lambda t: [number(3.14, 3, t)]), ('add', small_3d, lambda t: [number(3.14, 3, t)], '', types, False,
"skipIfRocm:ByteTensor,CharTensor,HalfTensor,ShortTensor"),
('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), ('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('add', small_3d, lambda t: [number(0.2, 2, t), small_3d_positive(t)], 'scalar_tensor'), ('add', small_3d, lambda t: [number(0.2, 2, t), small_3d_positive(t)], 'scalar_tensor'),
('sub', small_3d, lambda t: [number(3.14, 3, t)],), ('sub', small_3d, lambda t: [number(3.14, 3, t)], '', types, False,
"skipIfRocm:ByteTensor,CharTensor,HalfTensor,ShortTensor"),
('sub', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), ('sub', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('mul', small_3d, lambda t: [number(3.14, 3, t)],), ('mul', small_3d, lambda t: [number(3.14, 3, t)], '', types, False,
"skipIfRocm:ByteTensor,CharTensor,HalfTensor,ShortTensor"),
('mul', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), ('mul', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('div', small_3d, lambda t: [number(3.14, 3, t)],), ('div', small_3d, lambda t: [number(3.14, 3, t)], '', types, False,
"skipIfRocm:ByteTensor,CharTensor,FloatTensor,HalfTensor,ShortTensor"),
('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), ('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('pow', small_3d, lambda t: [number(3.14, 3, t)], None, float_types), ('pow', small_3d, lambda t: [number(3.14, 3, t)], None, float_types, False, "skipIfRocm:HalfTensor"),
('pow', small_3d, lambda t: [number(1., 1, t)], 'pow1', types), ('pow', small_3d, lambda t: [number(1., 1, t)], 'pow1', types, False, "skipIfRocm:HalfTensor"),
('pow', small_3d, lambda t: [number(2., 2, t)], 'pow2', types), ('pow', small_3d, lambda t: [number(2., 2, t)], 'pow2', types, False, "skipIfRocm:HalfTensor"),
('pow', small_3d, lambda t: [number(3., 3, t)], 'pow3', types), ('pow', small_3d, lambda t: [number(3., 3, t)], 'pow3', types, False, "skipIfRocm:HalfTensor"),
('pow', small_3d, lambda t: [number(-1., -1, t)], 'pow-1', float_types), ('pow', small_3d, lambda t: [number(-1., -1, t)], 'pow-1', float_types, False, "skipIfRocm:HalfTensor"),
# HalfTensor gives bad result at pow-2 with data sampled from torch.randn # HalfTensor gives bad result at pow-2 with data sampled from torch.randn
('pow', small_3d, lambda t: [number(-2., -2, t)], 'pow-2', float_types_no_half), ('pow', small_3d, lambda t: [number(-2., -2, t)], 'pow-2', float_types_no_half,
('pow', small_3d, lambda t: [tensor_abs_(small_3d(t))], 'tensor', float_types), False, "skipIfRocm:HalfTensor,FloatTensor"),
('addbmm', small_2d, lambda t: [small_3d(t), small_3d(t)], None, float_types), ('pow', small_3d, lambda t: [tensor_abs_(small_3d(t))], 'tensor', float_types, False, "skipIfRocm:HalfTensor"),
('addbmm', small_2d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'), ('addbmm', small_2d, lambda t: [small_3d(t), small_3d(t)], None, float_types, False, "skipIfRocm:HalfTensor"),
('addbmm', small_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars'), ('addbmm', small_2d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar',
('baddbmm', small_3d, lambda t: [small_3d(t), small_3d(t)],), types, False, "skipIfRocm:HalfTensor"),
('baddbmm', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'), ('addbmm', small_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars',
('baddbmm', small_3d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars'), types, False, "skipIfRocm:HalfTensor"),
('addcdiv', small_2d_lapack, lambda t: [tensor_mul(small_2d_lapack(t), 2), small_2d_lapack(t)],), ('baddbmm', small_3d, lambda t: [small_3d(t), small_3d(t)], '', types, False, "skipIfRocm:HalfTensor"),
('addcdiv', small_2d_lapack, lambda t: [number(2.8, 1, t), ('baddbmm', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar',
tensor_mul(small_2d_lapack(t), 2), small_2d_lapack(t)], 'scalar'), types, False, "skipIfRocm:HalfTensor"),
('addcmul', small_3d, lambda t: [small_3d(t), small_3d(t)],), ('baddbmm', small_3d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars',
('addcmul', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'), types, False, "skipIfRocm:HalfTensor"),
('addmm', medium_2d, lambda t: [medium_2d(t), medium_2d(t)],), ('addcdiv', small_2d_lapack, lambda t: [tensor_mul(small_2d_lapack(t), 2), small_2d_lapack(t)], '',
('addmm', medium_2d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'scalar'), types, False, "skipIfRocm:HalfTensor"),
('addmm', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'two_scalars'), ('addcdiv', small_2d_lapack, lambda t: [number(2.8, 1, t), tensor_mul(small_2d_lapack(t), 2), small_2d_lapack(t)],
('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)],), 'scalar', types, False, "skipIfRocm:HalfTensor"),
('addmv', medium_1d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'scalar'), ('addcmul', small_3d, lambda t: [small_3d(t), small_3d(t)], '', types, False, "skipIfRocm:HalfTensor"),
('addmv', medium_1d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'two_scalars'), ('addcmul', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar',
('addr', medium_2d, lambda t: [medium_1d(t), medium_1d(t)],), types, False, "skipIfRocm:HalfTensor"),
('addr', medium_2d, lambda t: [number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'scalar'), ('addmm', medium_2d, lambda t: [medium_2d(t), medium_2d(t)], '', types, False, "skipIfRocm:HalfTensor"),
('addr', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'two_scalars'), ('addmm', medium_2d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'scalar',
types, False, "skipIfRocm:HalfTensor"),
('addmm', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'two_scalars',
types, False, "skipIfRocm:HalfTensor"),
('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)], '', types, False, "skipIfRocm:HalfTensor"),
('addmv', medium_1d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'scalar',
types, False, "skipIfRocm:HalfTensor"),
('addmv', medium_1d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'two_scalars',
types, False, "skipIfRocm:HalfTensor"),
('addr', medium_2d, lambda t: [medium_1d(t), medium_1d(t)], '', types, False, "skipIfRocm:HalfTensor"),
('addr', medium_2d, lambda t: [number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'scalar',
types, False, "skipIfRocm:HalfTensor"),
('addr', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'two_scalars',
types, False, "skipIfRocm:HalfTensor"),
('atan2', medium_2d, lambda t: [medium_2d(t)], None, float_types + [torch.HalfTensor]), ('atan2', medium_2d, lambda t: [medium_2d(t)], None, float_types + [torch.HalfTensor]),
('fmod', small_3d, lambda t: [3], 'value'), ('fmod', small_3d, lambda t: [3], 'value', types, False, "skipIfRocm:HalfTensor"),
('fmod', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), ('fmod', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('chunk', medium_2d, lambda t: [4],), ('chunk', medium_2d, lambda t: [4],),
('chunk', medium_2d, lambda t: [4, 1], 'dim'), ('chunk', medium_2d, lambda t: [4, 1], 'dim'),
@ -296,15 +313,15 @@ tests = [
('clone', medium_2d, lambda t: [],), ('clone', medium_2d, lambda t: [],),
('contiguous', medium_2d, lambda t: [],), ('contiguous', medium_2d, lambda t: [],),
('cross', new_t(M, 3, M), lambda t: [new_t(M, 3, M)(t)],), ('cross', new_t(M, 3, M), lambda t: [new_t(M, 3, M)(t)],),
('cumprod', small_3d, lambda t: [1],), ('cumprod', small_3d, lambda t: [1], '', types, False, "skipIfRocm:HalfTensor"),
('cumprod', small_3d, lambda t: [-1], 'neg_dim'), ('cumprod', small_3d, lambda t: [-1], 'neg_dim', types, False, "skipIfRocm:HalfTensor"),
('cumsum', small_3d, lambda t: [1],), ('cumsum', small_3d, lambda t: [1], '', types, False, "skipIfRocm:HalfTensor"),
('cumsum', small_3d, lambda t: [-1], 'neg_dim'), ('cumsum', small_3d, lambda t: [-1], 'neg_dim', types, False, "skipIfRocm:HalfTensor"),
('dim', small_3d, lambda t: [],), ('dim', small_3d, lambda t: [],),
('dist', small_2d, lambda t: [small_2d(t)],), ('dist', small_2d, lambda t: [small_2d(t)], '', types, False, "skipIfRocm:HalfTensor"),
('dist', small_2d, lambda t: [small_2d(t), 3], '3_norm'), ('dist', small_2d, lambda t: [small_2d(t), 3], '3_norm', types, False, "skipIfRocm:HalfTensor"),
('dist', small_2d, lambda t: [small_2d(t), 2.5], '2_5_norm'), ('dist', small_2d, lambda t: [small_2d(t), 2.5], '2_5_norm', types, False, "skipIfRocm:HalfTensor"),
('dot', medium_1d, lambda t: [medium_1d(t)],), ('dot', medium_1d, lambda t: [medium_1d(t)], '', types, False, "skipIfRocm:HalfTensor"),
('element_size', medium_1d, lambda t: [],), ('element_size', medium_1d, lambda t: [],),
('eq', small_3d_ones, lambda t: [small_3d(t)],), ('eq', small_3d_ones, lambda t: [small_3d(t)],),
('eq', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'), ('eq', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'),
@ -314,7 +331,7 @@ tests = [
('equal', small_3d_ones, lambda t: [small_3d(t)],), ('equal', small_3d_ones, lambda t: [small_3d(t)],),
('expand', new_t(M, 1, M), lambda t: [M, 4, M],), ('expand', new_t(M, 1, M), lambda t: [M, 4, M],),
('expand_as', new_t(M, 1, M), lambda t: [new_t(M, 4, M)(t)],), ('expand_as', new_t(M, 1, M), lambda t: [new_t(M, 4, M)(t)],),
('fill', medium_2d, lambda t: [number(3.14, 3, t)],), ('fill', medium_2d, lambda t: [number(3.14, 3, t)], '', types, False, "skipIfRocm:HalfTensor"),
('ge', medium_2d, lambda t: [medium_2d(t)],), ('ge', medium_2d, lambda t: [medium_2d(t)],),
('le', medium_2d, lambda t: [medium_2d(t)],), ('le', medium_2d, lambda t: [medium_2d(t)],),
('gt', medium_2d, lambda t: [medium_2d(t)],), ('gt', medium_2d, lambda t: [medium_2d(t)],),
@ -328,31 +345,33 @@ tests = [
('kthvalue', small_3d_unique, lambda t: [3],), ('kthvalue', small_3d_unique, lambda t: [3],),
('kthvalue', small_3d_unique, lambda t: [3, 1], 'dim'), ('kthvalue', small_3d_unique, lambda t: [3, 1], 'dim'),
('kthvalue', small_3d_unique, lambda t: [3, -1], 'neg_dim'), ('kthvalue', small_3d_unique, lambda t: [3, -1], 'neg_dim'),
('lerp', small_3d, lambda t: [small_3d(t), 0.3],), ('lerp', small_3d, lambda t: [small_3d(t), 0.3], '', types, False, "skipIfRocm:HalfTensor"),
('max', small_3d_unique, lambda t: [],), ('max', small_3d_unique, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
('max', small_3d_unique, lambda t: [1], 'dim'), ('max', small_3d_unique, lambda t: [1], 'dim', types, False, skipIfRocm),
('max', small_3d_unique, lambda t: [-1], 'neg_dim'), ('max', small_3d_unique, lambda t: [-1], 'neg_dim', types, False, skipIfRocm),
('max', medium_2d, lambda t: [medium_2d(t)], 'elementwise'), ('max', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
('min', small_3d_unique, lambda t: [],), ('min', small_3d_unique, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
('min', small_3d_unique, lambda t: [1], 'dim'), ('min', small_3d_unique, lambda t: [1], 'dim', types, False, skipIfRocm),
('min', small_3d_unique, lambda t: [-1], 'neg_dim'), ('min', small_3d_unique, lambda t: [-1], 'neg_dim', types, False, skipIfRocm),
('min', medium_2d, lambda t: [medium_2d(t)], 'elementwise'), ('min', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
('mean', small_3d, lambda t: [],), ('mean', small_3d, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
('mean', small_3d, lambda t: [-1], 'neg_dim'), ('mean', small_3d, lambda t: [-1], 'neg_dim', types, False, "skipIfRocm:DoubleTensor,FloatTensor,HalfTensor"),
('mean', small_3d, lambda t: [1], 'dim'), ('mean', small_3d, lambda t: [1], 'dim', types, False, "skipIfRocm:DoubleTensor,FloatTensor,HalfTensor"),
('mode', small_3d, lambda t: [],), ('mode', small_3d, lambda t: [], '', types, False, skipIfRocm),
('mode', small_3d, lambda t: [1], 'dim'), ('mode', small_3d, lambda t: [1], 'dim', types, False, skipIfRocm),
('mode', small_3d, lambda t: [-1], 'neg_dim'), ('mode', small_3d, lambda t: [-1], 'neg_dim', types, False, skipIfRocm),
('mvlgamma', lambda t: tensor_clamp(small_2d(t), 0.1, 10), lambda t: [1], '2d_p=1', float_types_no_half), ('mvlgamma', lambda t: tensor_clamp(small_2d(t), 0.1, 10), lambda t: [1], '2d_p=1', float_types_no_half,
('mvlgamma', lambda t: tensor_clamp(small_2d(t), 0.6, 10), lambda t: [2], '2d_p=2', float_types_no_half), False, "skipIfRocm:DoubleTensor,FloatTensor"),
('remainder', small_3d, lambda t: [3], 'value'), ('mvlgamma', lambda t: tensor_clamp(small_2d(t), 0.6, 10), lambda t: [2], '2d_p=2', float_types_no_half,
False, "skipIfRocm:DoubleTensor,FloatTensor"),
('remainder', small_3d, lambda t: [3], 'value', types, False, "skipIfRocm:HalfTensor"),
('remainder', small_3d, lambda t: [-3], 'negative_value', signed_types), ('remainder', small_3d, lambda t: [-3], 'negative_value', signed_types),
('remainder', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), ('remainder', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
('remainder', small_3d, lambda t: [constant_tensor_sub(0, small_3d_positive(t))], 'negative_tensor', signed_types), ('remainder', small_3d, lambda t: [constant_tensor_sub(0, small_3d_positive(t))], 'negative_tensor', signed_types),
('std', small_3d, lambda t: [],), ('std', small_3d, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
('std', small_3d, lambda t: [1], 'dim'), ('std', small_3d, lambda t: [1], 'dim'),
('std', small_3d, lambda t: [-1], 'neg_dim'), ('std', small_3d, lambda t: [-1], 'neg_dim'),
('var', small_3d, lambda t: [],), ('var', small_3d, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
('var', small_3d, lambda t: [1], 'dim'), ('var', small_3d, lambda t: [1], 'dim'),
('var', small_3d, lambda t: [-1], 'neg_dim'), ('var', small_3d, lambda t: [-1], 'neg_dim'),
('ndimension', small_3d, lambda t: [],), ('ndimension', small_3d, lambda t: [],),
@ -360,34 +379,37 @@ tests = [
('numel', small_3d, lambda t: [],), ('numel', small_3d, lambda t: [],),
('narrow', small_3d, lambda t: [1, 3, 2],), ('narrow', small_3d, lambda t: [1, 3, 2],),
('narrow', small_3d, lambda t: [-1, 3, 2], 'neg_dim'), ('narrow', small_3d, lambda t: [-1, 3, 2], 'neg_dim'),
('nonzero', small_3d, lambda t: [],), ('nonzero', small_3d, lambda t: [], '', types, False, skipIfRocm),
('norm', small_3d, lambda t: [],), ('norm', small_3d, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
('norm', small_3d, lambda t: [3], '3_norm'), ('norm', small_3d, lambda t: [3], '3_norm', types, False, "skipIfRocm:HalfTensor"),
('norm', small_3d, lambda t: [3, 0], '3_norm_dim'), ('norm', small_3d, lambda t: [3, 0], '3_norm_dim', types, False, "skipIfRocm:HalfTensor,DoubleTensor,FloatTensor"),
('norm', small_3d, lambda t: [3, -2], '3_norm_neg_dim'), ('norm', small_3d, lambda t: [3, -2], '3_norm_neg_dim', types,
False, "skipIfRocm:HalfTensor,DoubleTensor,FloatTensor"),
('ones', small_3d, lambda t: [1, 2, 3, 4, 5],), ('ones', small_3d, lambda t: [1, 2, 3, 4, 5],),
('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0],), ('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0],),
('put_', new_t(2, 5, 3), lambda t: [long_type(t)([[0], [-2]]), t([[3], [4]])],), ('put_', new_t(2, 5, 3), lambda t: [long_type(t)([[0], [-2]]), t([[3], [4]])], '', types, False, skipIfRocm),
('put_', new_t(2, 3), lambda t: [long_type(t)([]), t([])], 'empty'), ('put_', new_t(2, 3), lambda t: [long_type(t)([]), t([])], 'empty'),
('put_', new_t(2, 2), lambda t: [long_type(t)([[1], [-3]]), t([[1], [2]]), True], 'accumulate'), ('put_', new_t(2, 2), lambda t: [long_type(t)([[1], [-3]]), t([[1], [2]]), True], 'accumulate'),
('prod', small_2d_oneish, lambda t: [],), ('prod', small_2d_oneish, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
('prod', small_3d, lambda t: [1], 'dim'), ('prod', small_3d, lambda t: [1], 'dim', types, False, skipIfRocm),
('prod', small_3d, lambda t: [-1], 'neg_dim'), ('prod', small_3d, lambda t: [-1], 'neg_dim', types, False, skipIfRocm),
('sum', small_2d, lambda t: [],), ('sum', small_2d, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
('sum', small_3d, lambda t: [1], 'dim'), ('sum', small_3d, lambda t: [1], 'dim', types, False, skipIfRocm),
('sum', small_3d, lambda t: [-1], 'neg_dim'), ('sum', small_3d, lambda t: [-1], 'neg_dim', types, False, skipIfRocm),
('renorm', small_3d, lambda t: [2, 1, 1], '2_norm'), ('renorm', small_3d, lambda t: [2, 1, 1], '2_norm', types, False, "skipIfRocm:HalfTensor,DoubleTensor,FloatTensor"),
('renorm', small_3d, lambda t: [2, -1, 1], '2_norm_neg_dim'), ('renorm', small_3d, lambda t: [2, -1, 1], '2_norm_neg_dim', types,
('renorm', small_3d, lambda t: [1.5, 1, 1], '1_5_norm'), False, "skipIfRocm:HalfTensor,DoubleTensor,FloatTensor"),
('renorm', small_3d, lambda t: [1.5, 1, 1], '1_5_norm', types,
False, "skipIfRocm:HalfTensor,DoubleTensor,FloatTensor"),
('repeat', small_2d, lambda t: [2, 2, 2],), ('repeat', small_2d, lambda t: [2, 2, 2],),
('size', new_t(1, 2, 3, 4), lambda t: [],), ('size', new_t(1, 2, 3, 4), lambda t: [],),
('size', new_t(1, 2, 3, 4), lambda t: [1], 'dim'), ('size', new_t(1, 2, 3, 4), lambda t: [1], 'dim'),
('size', new_t(1, 2, 3, 4), lambda t: [-2], 'neg_dim'), ('size', new_t(1, 2, 3, 4), lambda t: [-2], 'neg_dim'),
('sort', small_3d_unique, lambda t: [],), ('sort', small_3d_unique, lambda t: [], '', types, False, skipIfRocm),
('sort', small_3d_unique, lambda t: [1], 'dim'), ('sort', small_3d_unique, lambda t: [1], 'dim', types, False, skipIfRocm),
('sort', small_3d_unique, lambda t: [-1], 'neg_dim'), ('sort', small_3d_unique, lambda t: [-1], 'neg_dim', types, False, skipIfRocm),
('sort', small_3d_unique, lambda t: [1, True], 'dim_descending'), ('sort', small_3d_unique, lambda t: [1, True], 'dim_descending', types, False, skipIfRocm),
('sort', small_3d_unique, lambda t: [-1, True], 'neg_dim_descending'), ('sort', small_3d_unique, lambda t: [-1, True], 'neg_dim_descending', types, False, skipIfRocm),
('split', small_3d, lambda t: [2],), ('split', small_3d, lambda t: [2],),
('split', small_3d, lambda t: [2, 1], 'dim'), ('split', small_3d, lambda t: [2, 1], 'dim'),
('split', small_3d, lambda t: [2, -3], 'neg_dim'), ('split', small_3d, lambda t: [2, -3], 'neg_dim'),
@ -395,14 +417,14 @@ tests = [
('squeeze', new_t(1, 2, 1, 4), lambda t: [2], 'dim'), ('squeeze', new_t(1, 2, 1, 4), lambda t: [2], 'dim'),
('squeeze', new_t(1, 2, 1, 4), lambda t: [-2], 'neg_dim'), ('squeeze', new_t(1, 2, 1, 4), lambda t: [-2], 'neg_dim'),
('t', new_t(1, 2), lambda t: [],), ('t', new_t(1, 2), lambda t: [],),
('take', new_t(3, 4), lambda t: [long_type(t)([[0], [-2]])],), ('take', new_t(3, 4), lambda t: [long_type(t)([[0], [-2]])], '', types, False, skipIfRocm),
('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2],), ('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2],),
('transpose', new_t(1, 2, 3, 4), lambda t: [-1, -2], 'neg_dim'), ('transpose', new_t(1, 2, 3, 4), lambda t: [-1, -2], 'neg_dim'),
('to_list', small_3d, lambda t: [],), ('to_list', small_3d, lambda t: [],),
('topk', small_3d_unique, lambda t: [2, 1, False, True], 'dim_sort'), ('topk', small_3d_unique, lambda t: [2, 1, False, True], 'dim_sort', types, False, skipIfRocm),
('topk', small_3d_unique, lambda t: [2, -1, False, True], 'neg_dim_sort'), ('topk', small_3d_unique, lambda t: [2, -1, False, True], 'neg_dim_sort', types, False, skipIfRocm),
('topk', small_3d_unique, lambda t: [2, 1, True, True], 'dim_desc_sort'), ('topk', small_3d_unique, lambda t: [2, 1, True, True], 'dim_desc_sort', types, False, skipIfRocm),
('trace', medium_2d, lambda t: [],), ('trace', medium_2d, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
('tril', medium_2d, lambda t: [],), ('tril', medium_2d, lambda t: [],),
('tril', medium_2d_expanded, lambda t: [], 'zero_stride', types, True), ('tril', medium_2d_expanded, lambda t: [], 'zero_stride', types, True),
('tril', medium_2d, lambda t: [2], 'positive'), ('tril', medium_2d, lambda t: [2], 'positive'),
@ -443,7 +465,7 @@ tests = [
unittest.skipIf(not TEST_MAGMA, "no MAGMA library detected")), unittest.skipIf(not TEST_MAGMA, "no MAGMA library detected")),
('qr', large_2d_lapack, lambda t: [], 'big', float_types, False, ('qr', large_2d_lapack, lambda t: [], 'big', float_types, False,
unittest.skipIf(not TEST_MAGMA, "no MAGMA library detected")), unittest.skipIf(not TEST_MAGMA, "no MAGMA library detected")),
('inverse', new_t(20, 20), lambda t: [], None, float_types, False), ('inverse', new_t(20, 20), lambda t: [], None, float_types, False, "skipIfRocm:DoubleTensor,FloatTensor"),
('geqrf', new_t(20, 20), lambda t: [], None, float_types, False, ('geqrf', new_t(20, 20), lambda t: [], None, float_types, False,
unittest.skipIf(not TEST_MAGMA, "no MAGMA library detected")), unittest.skipIf(not TEST_MAGMA, "no MAGMA library detected")),
('svd', new_t(10, 10), lambda t: [], 'square', float_types_no_half, False, ('svd', new_t(10, 10), lambda t: [], 'square', float_types_no_half, False,
@ -760,6 +782,7 @@ class TestCuda(TestCase):
pass pass
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_memory_stats_multigpu(self): def test_memory_stats_multigpu(self):
# advance a generator with a end flag # advance a generator with a end flag
def advance(gen, end): def advance(gen, end):
@ -797,6 +820,7 @@ class TestCuda(TestCase):
t += 1 t += 1
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_autogpu(self): def test_autogpu(self):
x = torch.randn(5, 5).cuda() x = torch.randn(5, 5).cuda()
y = torch.randn(5, 5).cuda() y = torch.randn(5, 5).cuda()
@ -814,6 +838,7 @@ class TestCuda(TestCase):
self.assertEqual(z.get_device(), 0) self.assertEqual(z.get_device(), 0)
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_new(self): def test_new(self):
x = torch.randn(3, 3).cuda() x = torch.randn(3, 3).cuda()
self.assertEqual(x.new([0, 1, 2]).get_device(), 0) self.assertEqual(x.new([0, 1, 2]).get_device(), 0)
@ -824,6 +849,7 @@ class TestCuda(TestCase):
self.assertEqual(x.new([0, 1, 2], device=1).get_device(), 1) self.assertEqual(x.new([0, 1, 2], device=1).get_device(), 1)
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_copy_device(self): def test_copy_device(self):
x = torch.randn(5, 5).cuda() x = torch.randn(5, 5).cuda()
with torch.cuda.device(1): with torch.cuda.device(1):
@ -877,6 +903,7 @@ class TestCuda(TestCase):
self.assertIsInstance(y.cuda().float().cpu().int(), torch.IntStorage) self.assertIsInstance(y.cuda().float().cpu().int(), torch.IntStorage)
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_type_conversions_same_gpu(self): def test_type_conversions_same_gpu(self):
x = torch.randn(5, 5).cuda(1) x = torch.randn(5, 5).cuda(1)
self.assertEqual(x.int().get_device(), 1) self.assertEqual(x.int().get_device(), 1)
@ -896,12 +923,15 @@ class TestCuda(TestCase):
if input.is_cuda and input.get_device() == i: if input.is_cuda and input.get_device() == i:
self.assertEqual(t.data_ptr(), input.data_ptr()) self.assertEqual(t.data_ptr(), input.data_ptr())
@skipIfRocm
def test_broadcast_cpu(self): def test_broadcast_cpu(self):
self._test_broadcast(torch.randn(5, 5)) self._test_broadcast(torch.randn(5, 5))
@skipIfRocm
def test_broadcast_gpu(self): def test_broadcast_gpu(self):
self._test_broadcast(torch.randn(5, 5).cuda()) self._test_broadcast(torch.randn(5, 5).cuda())
@skipIfRocm
def test_min_max_nan(self): def test_min_max_nan(self):
tests = [(lambda x: x.min(), 'min'), tests = [(lambda x: x.min(), 'min'),
(lambda x: x.max(), 'max'), (lambda x: x.max(), 'max'),
@ -932,6 +962,7 @@ class TestCuda(TestCase):
self.assertIsInstance(bct, type(bt)) self.assertIsInstance(bct, type(bt))
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_broadcast_coalesced(self): def test_broadcast_coalesced(self):
numel = 5 numel = 5
num_bytes = numel * 8 num_bytes = numel * 8
@ -952,6 +983,7 @@ class TestCuda(TestCase):
self._test_broadcast_coalesced(self, tensors, num_bytes * 5 // 2) self._test_broadcast_coalesced(self, tensors, num_bytes * 5 // 2)
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_broadcast_coalesced_dense_only(self): def test_broadcast_coalesced_dense_only(self):
numel = 5 numel = 5
num_bytes = numel * 8 num_bytes = numel * 8
@ -966,6 +998,7 @@ class TestCuda(TestCase):
self._test_broadcast_coalesced(self, tensors, num_bytes * 5 // 2) self._test_broadcast_coalesced(self, tensors, num_bytes * 5 // 2)
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_reduce_add(self): def test_reduce_add(self):
x = torch.randn(5, 5) x = torch.randn(5, 5)
y = torch.randn(5, 5) y = torch.randn(5, 5)
@ -992,6 +1025,7 @@ class TestCuda(TestCase):
self.assertEqual(rc.type(), r.type()) self.assertEqual(rc.type(), r.type())
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_reduce_add_coalesced(self): def test_reduce_add_coalesced(self):
numel = 5 numel = 5
num_bytes = numel * 8 num_bytes = numel * 8
@ -1012,6 +1046,7 @@ class TestCuda(TestCase):
self._test_reduce_add_coalesced(self, tensors, num_bytes * 5 // 2) self._test_reduce_add_coalesced(self, tensors, num_bytes * 5 // 2)
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_reduce_add_coalesced_dense_only(self): def test_reduce_add_coalesced_dense_only(self):
numel = 5 numel = 5
num_bytes = numel * 8 num_bytes = numel * 8
@ -1083,12 +1118,15 @@ class TestCuda(TestCase):
index[dim] = slice(x.size(dim), x.size(dim) + y.size(dim)) index[dim] = slice(x.size(dim), x.size(dim) + y.size(dim))
self.assertEqual(result[tuple(index)], y) self.assertEqual(result[tuple(index)], y)
@skipIfRocm
def test_gather(self): def test_gather(self):
self._test_gather(0) self._test_gather(0)
@skipIfRocm
def test_gather_dim(self): def test_gather_dim(self):
self._test_gather(1) self._test_gather(1)
@skipIfRocm
def test_from_sequence(self): def test_from_sequence(self):
seq = [list(range(i * 4, i * 4 + 4)) for i in range(5)] seq = [list(range(i * 4, i * 4 + 4)) for i in range(5)]
reference = torch.arange(0, 20).resize_(5, 4) reference = torch.arange(0, 20).resize_(5, 4)
@ -1119,6 +1157,7 @@ class TestCuda(TestCase):
self.assertEqual(torch.cuda.initial_seed(), 2) self.assertEqual(torch.cuda.initial_seed(), 2)
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_cat_autogpu(self): def test_cat_autogpu(self):
x = torch.randn(4, 4).cuda(1) x = torch.randn(4, 4).cuda(1)
y = torch.randn(4, 4).cuda(1) y = torch.randn(4, 4).cuda(1)
@ -1146,9 +1185,11 @@ class TestCuda(TestCase):
z = torch.cat([x, y]) z = torch.cat([x, y])
self.assertEqual(z.size(), (21, SIZE, SIZE)) self.assertEqual(z.size(), (21, SIZE, SIZE))
@skipIfRocm
def test_cat_empty_legacy(self): def test_cat_empty_legacy(self):
TestTorch._test_cat_empty_legacy(self, use_cuda=True) TestTorch._test_cat_empty_legacy(self, use_cuda=True)
@skipIfRocm
def test_cat_empty(self): def test_cat_empty(self):
TestTorch._test_cat_empty(self, use_cuda=True) TestTorch._test_cat_empty(self, use_cuda=True)
@ -1203,6 +1244,7 @@ class TestCuda(TestCase):
self.assertEqual(copy.get_device(), original.get_device()) self.assertEqual(copy.get_device(), original.get_device())
@unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU") @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
@skipIfRocm
def test_multigpu_serialization(self): def test_multigpu_serialization(self):
x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)] x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
with tempfile.NamedTemporaryFile() as f: with tempfile.NamedTemporaryFile() as f:
@ -1215,6 +1257,7 @@ class TestCuda(TestCase):
self.assertEqual(copy.get_device(), original.get_device()) self.assertEqual(copy.get_device(), original.get_device())
@unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU") @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
@skipIfRocm
def test_multigpu_serialization_remap(self): def test_multigpu_serialization_remap(self):
x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)] x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
@ -1233,6 +1276,7 @@ class TestCuda(TestCase):
self.assertEqual(copy.get_device(), 0) self.assertEqual(copy.get_device(), 0)
@unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU") @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
@skipIfRocm
def test_multigpu_serialization_remap_dict(self): def test_multigpu_serialization_remap_dict(self):
x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)] x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)]
with tempfile.NamedTemporaryFile() as f: with tempfile.NamedTemporaryFile() as f:
@ -1245,6 +1289,7 @@ class TestCuda(TestCase):
self.assertEqual(copy.get_device(), 0) self.assertEqual(copy.get_device(), 0)
@unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU") @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
@skipIfRocm
def test_cuda_set_device(self): def test_cuda_set_device(self):
x = torch.randn(5, 5) x = torch.randn(5, 5)
with torch.cuda.device(1): with torch.cuda.device(1):
@ -1266,6 +1311,7 @@ class TestCuda(TestCase):
def test_cuda_synchronize(self): def test_cuda_synchronize(self):
torch.cuda.synchronize() torch.cuda.synchronize()
@skipIfRocm
def test_streams(self): def test_streams(self):
default_stream = torch.cuda.current_stream() default_stream = torch.cuda.current_stream()
user_stream = torch.cuda.Stream() user_stream = torch.cuda.Stream()
@ -1284,6 +1330,7 @@ class TestCuda(TestCase):
self.assertTrue(default_stream.query()) self.assertTrue(default_stream.query())
@unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU") @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
@skipIfRocm
def test_streams_multi_gpu(self): def test_streams_multi_gpu(self):
default_stream = torch.cuda.current_stream() default_stream = torch.cuda.current_stream()
self.assertEqual(default_stream.device, 0) self.assertEqual(default_stream.device, 0)
@ -1294,6 +1341,7 @@ class TestCuda(TestCase):
self.assertNotEqual(torch.cuda.current_stream(), default_stream) self.assertNotEqual(torch.cuda.current_stream(), default_stream)
@unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported") @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
@skipIfRocm
def test_tensor_device(self): def test_tensor_device(self):
self.assertEqual(torch.cuda.FloatTensor(1).get_device(), 0) self.assertEqual(torch.cuda.FloatTensor(1).get_device(), 0)
self.assertEqual(torch.cuda.FloatTensor(1, device=1).get_device(), 1) self.assertEqual(torch.cuda.FloatTensor(1, device=1).get_device(), 1)
@ -1302,6 +1350,7 @@ class TestCuda(TestCase):
self.assertEqual(torch.cuda.FloatTensor(1, device=0).get_device(), 0) self.assertEqual(torch.cuda.FloatTensor(1, device=0).get_device(), 0)
self.assertEqual(torch.cuda.FloatTensor(1, device=None).get_device(), 1) self.assertEqual(torch.cuda.FloatTensor(1, device=None).get_device(), 1)
@skipIfRocm
def test_events(self): def test_events(self):
stream = torch.cuda.current_stream() stream = torch.cuda.current_stream()
event = torch.cuda.Event(enable_timing=True) event = torch.cuda.Event(enable_timing=True)
@ -1315,6 +1364,7 @@ class TestCuda(TestCase):
self.assertTrue(event.query()) self.assertTrue(event.query())
self.assertGreater(start_event.elapsed_time(event), 0) self.assertGreater(start_event.elapsed_time(event), 0)
@skipIfRocm
def test_record_stream(self): def test_record_stream(self):
cycles_per_ms = get_cycles_per_ms() cycles_per_ms = get_cycles_per_ms()
@ -1352,6 +1402,7 @@ class TestCuda(TestCase):
x = torch.arange(0, 10).view((2, 5)) x = torch.arange(0, 10).view((2, 5))
self.assertEqual(x.t(), x.t().pin_memory()) self.assertEqual(x.t(), x.t().pin_memory())
@skipIfRocm
def test_caching_pinned_memory(self): def test_caching_pinned_memory(self):
cycles_per_ms = get_cycles_per_ms() cycles_per_ms = get_cycles_per_ms()
@ -1372,6 +1423,7 @@ class TestCuda(TestCase):
self.assertEqual(list(gpu_tensor), [1]) self.assertEqual(list(gpu_tensor), [1])
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_caching_pinned_memory_multi_gpu(self): def test_caching_pinned_memory_multi_gpu(self):
# checks that the events preventing pinned memory from being re-used # checks that the events preventing pinned memory from being re-used
# too early are recorded on the correct GPU # too early are recorded on the correct GPU
@ -1432,6 +1484,7 @@ class TestCuda(TestCase):
def test_signal_window_functions(self): def test_signal_window_functions(self):
TestTorch._test_signal_window_functions(self, device=torch.device('cuda')) TestTorch._test_signal_window_functions(self, device=torch.device('cuda'))
@skipIfRocm
def test_fft_ifft_rfft_irfft(self): def test_fft_ifft_rfft_irfft(self):
TestTorch._test_fft_ifft_rfft_irfft(self, device=torch.device('cuda')) TestTorch._test_fft_ifft_rfft_irfft(self, device=torch.device('cuda'))
@ -1463,6 +1516,7 @@ class TestCuda(TestCase):
def test_stft(self): def test_stft(self):
TestTorch._test_stft(self, device=torch.device('cuda')) TestTorch._test_stft(self, device=torch.device('cuda'))
@skipIfRocm
def test_multinomial(self): def test_multinomial(self):
TestTorch._test_multinomial(self, torch.cuda.FloatTensor) TestTorch._test_multinomial(self, torch.cuda.FloatTensor)
@ -1528,6 +1582,7 @@ class TestCuda(TestCase):
self._spawn_method(test_method, torch.Tensor([0, -inf])) self._spawn_method(test_method, torch.Tensor([0, -inf]))
self._spawn_method(test_method, torch.Tensor([0, nan])) self._spawn_method(test_method, torch.Tensor([0, nan]))
@skipIfRocm
def test_broadcast(self): def test_broadcast(self):
TestTorch._test_broadcast(self, lambda t: t.cuda()) TestTorch._test_broadcast(self, lambda t: t.cuda())
@ -1540,12 +1595,15 @@ class TestCuda(TestCase):
def test_broadcast_batched_matmul(self): def test_broadcast_batched_matmul(self):
TestTorch._test_broadcast_batched_matmul(self, lambda t: t.cuda()) TestTorch._test_broadcast_batched_matmul(self, lambda t: t.cuda())
@skipIfRocm
def test_index(self): def test_index(self):
TestTorch._test_index(self, lambda t: t.cuda()) TestTorch._test_index(self, lambda t: t.cuda())
@skipIfRocm
def test_advancedindex(self): def test_advancedindex(self):
TestTorch._test_advancedindex(self, lambda t: t.cuda()) TestTorch._test_advancedindex(self, lambda t: t.cuda())
@skipIfRocm
def test_advancedindex_mixed_cpu_cuda(self): def test_advancedindex_mixed_cpu_cuda(self):
def test(x, ia, ib): def test(x, ia, ib):
# test getitem # test getitem
@ -1594,30 +1652,37 @@ class TestCuda(TestCase):
ib = ib.to(other_device) ib = ib.to(other_device)
test(x, ia, ib) test(x, ia, ib)
@skipIfRocm
def test_advancedindex_big(self): def test_advancedindex_big(self):
TestTorch._test_advancedindex_big(self, lambda t: t.cuda()) TestTorch._test_advancedindex_big(self, lambda t: t.cuda())
@skipIfRocm
def test_btrifact(self): def test_btrifact(self):
TestTorch._test_btrifact(self, lambda t: t.cuda()) TestTorch._test_btrifact(self, lambda t: t.cuda())
@skipIfRocm
def test_btrisolve(self): def test_btrisolve(self):
TestTorch._test_btrisolve(self, lambda t: t.cuda()) TestTorch._test_btrisolve(self, lambda t: t.cuda())
@skipIfRocm
def test_dim_reduction(self): def test_dim_reduction(self):
TestTorch._test_dim_reduction(self, lambda t: t.cuda()) TestTorch._test_dim_reduction(self, lambda t: t.cuda())
@skipIfRocm
def test_tensor_gather(self): def test_tensor_gather(self):
TestTorch._test_gather(self, lambda t: t.cuda(), False) TestTorch._test_gather(self, lambda t: t.cuda(), False)
def test_tensor_scatter(self): def test_tensor_scatter(self):
TestTorch._test_scatter_base(self, lambda t: t.cuda(), 'scatter_', test_bounds=False) TestTorch._test_scatter_base(self, lambda t: t.cuda(), 'scatter_', test_bounds=False)
@skipIfRocm
def test_tensor_scatterAdd(self): def test_tensor_scatterAdd(self):
TestTorch._test_scatter_base(self, lambda t: t.cuda(), 'scatter_add_', test_bounds=False) TestTorch._test_scatter_base(self, lambda t: t.cuda(), 'scatter_add_', test_bounds=False)
def test_tensor_scatterFill(self): def test_tensor_scatterFill(self):
TestTorch._test_scatter_base(self, lambda t: t.cuda(), 'scatter_', True, test_bounds=False) TestTorch._test_scatter_base(self, lambda t: t.cuda(), 'scatter_', True, test_bounds=False)
@skipIfRocm
def test_min_max_inits(self): def test_min_max_inits(self):
# Testing if THC_reduceAll received the correct index initialization. # Testing if THC_reduceAll received the correct index initialization.
# This affects the result of THC_reduceAll operations at extreme values # This affects the result of THC_reduceAll operations at extreme values
@ -1692,6 +1757,7 @@ class TestCuda(TestCase):
tensor = tensor.unsqueeze(1) tensor = tensor.unsqueeze(1)
self.assertEqual(tensor.var(0), 0.03125) self.assertEqual(tensor.var(0), 0.03125)
@skipIfRocm
def test_digamma(self): def test_digamma(self):
def test(use_double=False): def test(use_double=False):
cpu_tensor = torch.randn(10, 10, 10) cpu_tensor = torch.randn(10, 10, 10)
@ -1720,6 +1786,7 @@ class TestCuda(TestCase):
norm_errors = (gpu_out - cpu_out.cuda()) / gpu_out norm_errors = (gpu_out - cpu_out.cuda()) / gpu_out
self.assertEqual(norm_errors, expected_errors) self.assertEqual(norm_errors, expected_errors)
@skipIfRocm
def test_polygamma(self): def test_polygamma(self):
def test(use_double=False): def test(use_double=False):
cpu_tensor = torch.randn(10, 10, 10) cpu_tensor = torch.randn(10, 10, 10)
@ -1771,6 +1838,7 @@ class TestCuda(TestCase):
TestTorch._test_trtrs(self, lambda t: t.cuda()) TestTorch._test_trtrs(self, lambda t: t.cuda())
@unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected") @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
@skipIfRocm
def test_get_set_rng_state_all(self): def test_get_set_rng_state_all(self):
states = torch.cuda.get_rng_state_all() states = torch.cuda.get_rng_state_all()
before0 = torch.cuda.FloatTensor(100, device=0).normal_() before0 = torch.cuda.FloatTensor(100, device=0).normal_()
@ -1781,12 +1849,14 @@ class TestCuda(TestCase):
self.assertEqual(before0, after0, 0) self.assertEqual(before0, after0, 0)
self.assertEqual(before1, after1, 0) self.assertEqual(before1, after1, 0)
@skipIfRocm
def test_nvtx(self): def test_nvtx(self):
# Just making sure we can see the symbols # Just making sure we can see the symbols
torch.cuda.nvtx.range_push("foo") torch.cuda.nvtx.range_push("foo")
torch.cuda.nvtx.mark("bar") torch.cuda.nvtx.mark("bar")
torch.cuda.nvtx.range_pop() torch.cuda.nvtx.range_pop()
@skipIfRocm
def test_randperm_cuda(self): def test_randperm_cuda(self):
cuda = torch.device('cuda:0') cuda = torch.device('cuda:0')
@ -1825,6 +1895,7 @@ class TestCuda(TestCase):
def test_random_neg_values(self): def test_random_neg_values(self):
TestTorch._test_random_neg_values(self, use_cuda=True) TestTorch._test_random_neg_values(self, use_cuda=True)
@skipIfRocm
def test_bincount_cuda(self): def test_bincount_cuda(self):
TestTorch._test_bincount(self, device='cuda') TestTorch._test_bincount(self, device='cuda')
# ensure CUDA code coverage # ensure CUDA code coverage
@ -1846,6 +1917,7 @@ class TestCuda(TestCase):
self.assertEqual(t.cpu().bincount(), t.bincount()) self.assertEqual(t.cpu().bincount(), t.bincount())
self.assertEqual(t.cpu().bincount(w_cpu), t.bincount(w)) self.assertEqual(t.cpu().bincount(w_cpu), t.bincount(w))
@skipIfRocm
def test_tiny_half_norm_(self): def test_tiny_half_norm_(self):
a = torch.arange(25).cuda().float() a = torch.arange(25).cuda().float()
a /= 100000000 a /= 100000000
@ -1909,6 +1981,25 @@ def generate_tests():
if t not in type_subset: if t not in type_subset:
continue continue
if TEST_WITH_ROCM and decorator is not None:
if (isinstance(decorator, str)):
tensor_type_name = str(t.__name__)
decorator_list = decorator.split(":")
skip_type_list = decorator_list[1].split(",")
if (("ByteTensor" in skip_type_list) and tensor_type_name == "ByteTensor") \
or (("CharTensor" in skip_type_list) and tensor_type_name == "CharTensor") \
or (("DoubleTensor" in skip_type_list) and tensor_type_name == "DoubleTensor") \
or (("FloatTensor" in skip_type_list) and tensor_type_name == "FloatTensor") \
or (("HalfTensor" in skip_type_list) and tensor_type_name == "HalfTensor") \
or (("IntTensor" in skip_type_list) and tensor_type_name == "IntTensor") \
or (("LongTensor" in skip_type_list) and tensor_type_name == "LongTensor") \
or (("ShortTensor" in skip_type_list) and tensor_type_name == "ShortTensor"):
decorator = skipIfRocm
else:
decorator = None
elif ((not TEST_WITH_ROCM) and (decorator is not None)):
if (isinstance(decorator, str)):
decorator = None
precision = custom_precision.get(name, TestCuda.precision) precision = custom_precision.get(name, TestCuda.precision)
if is_half(t): if is_half(t):

View File

@ -608,6 +608,7 @@ class TestJit(JitTestCase):
@unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
@skipIfRocm
def test_fusion_rand(self): def test_fusion_rand(self):
class M(torch.jit.ScriptModule): class M(torch.jit.ScriptModule):
__constants__ = ['d'] __constants__ = ['d']
@ -631,6 +632,7 @@ class TestJit(JitTestCase):
@unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
@unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
@skipIfRocm
def test_fusion_arg_configurations(self): def test_fusion_arg_configurations(self):
# A smoke test to make sure we won't use the same kernel for contiguous # A smoke test to make sure we won't use the same kernel for contiguous
# and non-contiguous arguments. # and non-contiguous arguments.
@ -846,6 +848,7 @@ class TestJit(JitTestCase):
@unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
@unittest.skipIf(not RUN_CUDA_MULTI_GPU, "needs non-zero device") @unittest.skipIf(not RUN_CUDA_MULTI_GPU, "needs non-zero device")
@skipIfRocm
def test_fuse_last_device(self): def test_fuse_last_device(self):
device = 'cuda:' + str(1) device = 'cuda:' + str(1)
x = torch.tensor([0.4], dtype=torch.float, device=device) x = torch.tensor([0.4], dtype=torch.float, device=device)
@ -2521,6 +2524,7 @@ a")
@unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
@unittest.skipIf(not RUN_CUDA, "No CUDA") @unittest.skipIf(not RUN_CUDA, "No CUDA")
@skipIfRocm
def test_chunk_fusion_cuda(self): def test_chunk_fusion_cuda(self):
def fn(x): def fn(x):
a, b, c = x.chunk(3, 1) a, b, c = x.chunk(3, 1)
@ -2536,6 +2540,7 @@ a")
@unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
@unittest.skipIf(not RUN_CUDA, "No CUDA") @unittest.skipIf(not RUN_CUDA, "No CUDA")
@skipIfRocm
def test_chunk_multiple_fusion_cuda(self): def test_chunk_multiple_fusion_cuda(self):
# The arguments are intentionally used out of order as a test to see # The arguments are intentionally used out of order as a test to see
# if the fusion compiler adds extra args in the correct order # if the fusion compiler adds extra args in the correct order
@ -2589,11 +2594,13 @@ a")
self.checkScript(fn, [tensor]) self.checkScript(fn, [tensor])
@unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
@skipIfRocm
def test_chunk_fusion_correctness(self): def test_chunk_fusion_correctness(self):
return self._test_chunk_fusion_correctness(self, 'cpu') return self._test_chunk_fusion_correctness(self, 'cpu')
@unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
@unittest.skipIf(not RUN_CUDA, "No CUDA") @unittest.skipIf(not RUN_CUDA, "No CUDA")
@skipIfRocm
def test_chunk_fusion_correctness_cuda(self): def test_chunk_fusion_correctness_cuda(self):
return self._test_chunk_fusion_correctness(self, 'cuda') return self._test_chunk_fusion_correctness(self, 'cuda')
@ -6457,6 +6464,7 @@ class TestEndToEndHybridFrontendModels(JitTestCase):
self.checkTrace(Policy(), (torch.rand(1, 4),)) self.checkTrace(Policy(), (torch.rand(1, 4),))
@skipIfRocm
def test_snli(self): def test_snli(self):
# TODO: # TODO:
# 1) nn.LSTM is called as a Python function https://github.com/pytorch/pytorch/issues/8449 # 1) nn.LSTM is called as a Python function https://github.com/pytorch/pytorch/issues/8449
@ -6549,6 +6557,7 @@ class TestEndToEndHybridFrontendModels(JitTestCase):
self.checkTrace(SNLIClassifier(Config()), (premise, hypothesis), inputs_require_grads=False) self.checkTrace(SNLIClassifier(Config()), (premise, hypothesis), inputs_require_grads=False)
@skipIfRocm
def test_super_resolution(self): def test_super_resolution(self):
import torch.nn.init as init import torch.nn.init as init
@ -6704,6 +6713,7 @@ class TestPytorchExportModes(JitTestCase):
export_type=torch.onnx.ExportTypes.DIRECTORY) export_type=torch.onnx.ExportTypes.DIRECTORY)
shutil.rmtree(d) shutil.rmtree(d)
@skipIfRocm
def test_aten_fallback(self): def test_aten_fallback(self):
class ModelWithAtenNotONNXOp(nn.Module): class ModelWithAtenNotONNXOp(nn.Module):
def forward(self, x, y): def forward(self, x, y):

View File

@ -6,7 +6,7 @@ from copy import deepcopy
import torch import torch
import torch.legacy.nn as nn import torch.legacy.nn as nn
from common import to_gpu, freeze_rng_state, run_tests from common import to_gpu, freeze_rng_state, run_tests, skipIfRocm, TEST_WITH_ROCM
from common_nn import NNTestCase, ModuleTest, CriterionTest, iter_tensors, \ from common_nn import NNTestCase, ModuleTest, CriterionTest, iter_tensors, \
module_tests, criterion_tests, PRECISION module_tests, criterion_tests, PRECISION
from torch.autograd.gradcheck import get_numerical_jacobian from torch.autograd.gradcheck import get_numerical_jacobian
@ -66,33 +66,40 @@ tests = [
constructor_args=(3.5,), constructor_args=(3.5,),
input_size=(3, 5, 4), input_size=(3, 5, 4),
reference_fn=lambda i, _: i + 3.5, reference_fn=lambda i, _: i + 3.5,
check_inplace=True), check_inplace=True,
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.BatchNormalization, OldModuleTest(nn.BatchNormalization,
constructor_args=(10,), constructor_args=(10,),
input_size=(4, 10), input_size=(4, 10),
desc='affine'), desc='affine',
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.BatchNormalization, OldModuleTest(nn.BatchNormalization,
constructor_args=(10, 1e-3, 0.3, False), constructor_args=(10, 1e-3, 0.3, False),
input_size=(4, 10), input_size=(4, 10),
desc='not_affine'), desc='not_affine',
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.SpatialBatchNormalization, OldModuleTest(nn.SpatialBatchNormalization,
constructor_args=(3,), constructor_args=(3,),
input_size=(2, 3, 6, 6)), input_size=(2, 3, 6, 6),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.SpatialBatchNormalization, OldModuleTest(nn.SpatialBatchNormalization,
constructor_args=(3, 1e-3, 0.8), constructor_args=(3, 1e-3, 0.8),
input_size=(2, 3, 6, 6), input_size=(2, 3, 6, 6),
desc='momentum'), desc='momentum',
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.SpatialBatchNormalization, OldModuleTest(nn.SpatialBatchNormalization,
constructor_args=(3, 1e-3, 0.8, False), constructor_args=(3, 1e-3, 0.8, False),
input_size=(2, 3, 6, 6), input_size=(2, 3, 6, 6),
desc='no_affine'), desc='no_affine'),
OldModuleTest(nn.VolumetricBatchNormalization, OldModuleTest(nn.VolumetricBatchNormalization,
constructor_args=(3,), constructor_args=(3,),
input_size=(2, 3, 4, 4, 4)), input_size=(2, 3, 4, 4, 4),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.VolumetricBatchNormalization, OldModuleTest(nn.VolumetricBatchNormalization,
constructor_args=(3, 1e-3, 0.7), constructor_args=(3, 1e-3, 0.7),
input_size=(2, 3, 4, 4, 4), input_size=(2, 3, 4, 4, 4),
desc='momentum'), desc='momentum',
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.VolumetricBatchNormalization, OldModuleTest(nn.VolumetricBatchNormalization,
constructor_args=(3, 1e-3, 0.7, False), constructor_args=(3, 1e-3, 0.7, False),
input_size=(2, 3, 4, 4, 4), input_size=(2, 3, 4, 4, 4),
@ -100,52 +107,67 @@ tests = [
OldModuleTest(nn.CMul, OldModuleTest(nn.CMul,
constructor_args=(5, 6), constructor_args=(5, 6),
input_size=(10, 5, 6), input_size=(10, 5, 6),
desc='3D'), desc='3D',
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.CMul, OldModuleTest(nn.CMul,
constructor_args=(50, 4), constructor_args=(50, 4),
input_size=(1, 50, 4), input_size=(1, 50, 4),
desc='3D_single_example'), desc='3D_single_example',
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.CMul, OldModuleTest(nn.CMul,
constructor_args=(1, 5), constructor_args=(1, 5),
input_fn=lambda: torch.randn(10, 3, 5)[:, 1], input_fn=lambda: torch.randn(10, 3, 5)[:, 1],
desc='3D_noncontiguous'), desc='3D_noncontiguous',
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Exp, OldModuleTest(nn.Exp,
input_size=(2, 3, 4), input_size=(2, 3, 4),
reference_fn=lambda i, _: i.exp()), reference_fn=lambda i, _: i.exp(),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Log, OldModuleTest(nn.Log,
input_fn=lambda: torch.rand(2, 3, 2) + 0.1, input_fn=lambda: torch.rand(2, 3, 2) + 0.1,
reference_fn=lambda i, _: i.log()), reference_fn=lambda i, _: i.log(),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Clamp, OldModuleTest(nn.Clamp,
constructor_args=(-2., 5.), constructor_args=(-2., 5.),
input_fn=lambda: torch.randn(3, 2, 50) * 6, input_fn=lambda: torch.randn(3, 2, 50) * 6,
reference_fn=lambda i, _: i.clamp(-2, 5)), reference_fn=lambda i, _: i.clamp(-2, 5)),
OldModuleTest(nn.Abs, OldModuleTest(nn.Abs,
input_size=(3, 20, 5), input_size=(3, 20, 5),
reference_fn=lambda i, _: i.abs()), reference_fn=lambda i, _: i.abs(),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Bilinear, OldModuleTest(nn.Bilinear,
constructor_args=(2, 3, 10), constructor_args=(2, 3, 10),
input_size=[(4, 2), (4, 3)]), input_size=[(4, 2), (4, 3)],
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Bilinear, OldModuleTest(nn.Bilinear,
constructor_args=(5, 4, 2), constructor_args=(5, 4, 2),
input_size=[(2, 5), (2, 4)], input_size=[(2, 5), (2, 4)],
desc='small_output'), desc='small_output',
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Euclidean, OldModuleTest(nn.Euclidean,
constructor_args=(5, 7), constructor_args=(5, 7),
input_size=(10, 5)), input_size=(10, 5),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.WeightedEuclidean, OldModuleTest(nn.WeightedEuclidean,
constructor_args=(5, 7), constructor_args=(5, 7),
input_size=(10, 5)), input_size=(10, 5),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Cosine, OldModuleTest(nn.Cosine,
constructor_args=(5, 7), constructor_args=(5, 7),
input_size=(10, 5)), input_size=(10, 5),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.CAddTable, OldModuleTest(nn.CAddTable,
input_size=[(5, 7), (5, 7)]), input_size=[(5, 7), (5, 7)],
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.CSubTable, OldModuleTest(nn.CSubTable,
input_size=[(5, 7), (5, 7)]), input_size=[(5, 7), (5, 7)],
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.CDivTable, OldModuleTest(nn.CDivTable,
input_fn=lambda: [torch.randn(1, 7), torch.rand(1, 7) + 0.1]), input_fn=lambda: [torch.randn(1, 7), torch.rand(1, 7) + 0.1],
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.CMulTable, OldModuleTest(nn.CMulTable,
input_size=[(5, 7), (5, 7)]), input_size=[(5, 7), (5, 7)],
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Square, OldModuleTest(nn.Square,
input_size=(10, 2, 4), input_size=(10, 2, 4),
reference_fn=lambda i, _: i.mul(i)), reference_fn=lambda i, _: i.mul(i)),
@ -192,31 +214,37 @@ tests = [
OldModuleTest(nn.Sum, OldModuleTest(nn.Sum,
constructor_args=(1,), constructor_args=(1,),
input_size=(2, 4, 5), input_size=(2, 4, 5),
reference_fn=lambda i, _: i.sum(1, keepdim=False)), reference_fn=lambda i, _: i.sum(1, keepdim=False),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Sum, OldModuleTest(nn.Sum,
constructor_args=(1, True), constructor_args=(1, True),
input_size=(2, 4, 5), input_size=(2, 4, 5),
reference_fn=lambda i, _: i.sum(1, keepdim=False).div(i.size(1)), reference_fn=lambda i, _: i.sum(1, keepdim=False).div(i.size(1)),
desc='sizeAverage'), desc='sizeAverage',
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Mean, OldModuleTest(nn.Mean,
constructor_args=(1,), constructor_args=(1,),
input_size=(2, 4, 5), input_size=(2, 4, 5),
reference_fn=lambda i, _: torch.mean(i, 1, keepdim=False)), reference_fn=lambda i, _: torch.mean(i, 1, keepdim=False),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(lambda: nn.Sequential().add(nn.GradientReversal()).add(nn.GradientReversal()), OldModuleTest(lambda: nn.Sequential().add(nn.GradientReversal()).add(nn.GradientReversal()),
input_size=(4, 3, 2, 2), input_size=(4, 3, 2, 2),
fullname='GradientReversal'), fullname='GradientReversal',
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Identity, OldModuleTest(nn.Identity,
input_size=(4, 3, 2, 4), input_size=(4, 3, 2, 4),
reference_fn=lambda i, _: i), reference_fn=lambda i, _: i),
OldModuleTest(nn.DotProduct, OldModuleTest(nn.DotProduct,
input_size=[(10, 4), (10, 4)], input_size=[(10, 4), (10, 4)],
reference_fn=lambda i, _: torch.Tensor(list( reference_fn=lambda i, _: torch.Tensor(list(
a.dot(b) for a, b in zip(i[0], i[1]))) a.dot(b) for a, b in zip(i[0], i[1]))),
test_cuda=(not TEST_WITH_ROCM)
), ),
OldModuleTest(nn.CosineDistance, OldModuleTest(nn.CosineDistance,
input_size=[(10, 4), (10, 4)], input_size=[(10, 4), (10, 4)],
reference_fn=lambda i, _: torch.Tensor(list( reference_fn=lambda i, _: torch.Tensor(list(
a.dot(b) / (a.norm(2) * b.norm(2)) for a, b in zip(i[0], i[1]))) a.dot(b) / (a.norm(2) * b.norm(2)) for a, b in zip(i[0], i[1]))),
test_cuda=(not TEST_WITH_ROCM)
), ),
OldModuleTest(nn.JoinTable, OldModuleTest(nn.JoinTable,
constructor_args=(0,), constructor_args=(0,),
@ -256,19 +284,23 @@ tests = [
reference_fn=lambda i, _: torch.min(i, 1, False)[0], reference_fn=lambda i, _: torch.min(i, 1, False)[0],
desc='with_dimension'), desc='with_dimension'),
OldModuleTest(nn.MixtureTable, OldModuleTest(nn.MixtureTable,
input_size=[(5, 3), (5, 3, 6)]), input_size=[(5, 3), (5, 3, 6)],
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.LookupTable, OldModuleTest(nn.LookupTable,
constructor_args=(4, 3), constructor_args=(4, 3),
input_fn=lambda: torch.randperm(2).repeat(1, 2), input_fn=lambda: torch.randperm(2).repeat(1, 2),
jacobian_input=False), jacobian_input=False,
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Mul, OldModuleTest(nn.Mul,
input_size=(2, 3, 4, 2), input_size=(2, 3, 4, 2),
reference_fn=lambda i, p: i * p[0][0]), reference_fn=lambda i, p: i * p[0][0],
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.MulConstant, OldModuleTest(nn.MulConstant,
constructor_args=(4,), constructor_args=(4,),
input_size=(2, 3, 4, 2), input_size=(2, 3, 4, 2),
reference_fn=lambda i, _: i * 4, reference_fn=lambda i, _: i * 4,
check_inplace=True), check_inplace=True,
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Narrow, OldModuleTest(nn.Narrow,
constructor_args=(0, 0), constructor_args=(0, 0),
input_size=(2, 3, 4, 2), input_size=(2, 3, 4, 2),
@ -291,7 +323,8 @@ tests = [
OldModuleTest(nn.Replicate, OldModuleTest(nn.Replicate,
constructor_args=(2, 1), constructor_args=(2, 1),
input_size=(10, 3, 4, 5), input_size=(10, 3, 4, 5),
reference_fn=lambda i, _: i.view(10, 1, 3, 4, 5).expand(10, 2, 3, 4, 5)), reference_fn=lambda i, _: i.view(10, 1, 3, 4, 5).expand(10, 2, 3, 4, 5),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Padding, OldModuleTest(nn.Padding,
constructor_args=(0, 2, -10), constructor_args=(0, 2, -10),
input_size=(2, 3, 4, 5)), input_size=(2, 3, 4, 5)),
@ -305,17 +338,21 @@ tests = [
desc='negative_pad'), desc='negative_pad'),
OldModuleTest(nn.PartialLinear, OldModuleTest(nn.PartialLinear,
constructor_args=(5, 6), constructor_args=(5, 6),
input_size=(4, 5)), input_size=(4, 5),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(lambda: nn.PartialLinear(5, 6).setPartition(torch.Tensor((2, 4))), OldModuleTest(lambda: nn.PartialLinear(5, 6).setPartition(torch.Tensor((2, 4))),
input_size=(4, 5), input_size=(4, 5),
fullname='PartialLinear_setPartition'), fullname='PartialLinear_setPartition',
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Power, OldModuleTest(nn.Power,
constructor_args=(2,), constructor_args=(2,),
input_size=(2, 3, 4, 5)), input_size=(2, 3, 4, 5),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Power, OldModuleTest(nn.Power,
constructor_args=(1.5,), constructor_args=(1.5,),
input_fn=lambda: torch.rand(3, 4, 5), input_fn=lambda: torch.rand(3, 4, 5),
desc='fractional'), desc='fractional',
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.Reshape, OldModuleTest(nn.Reshape,
constructor_args=(4, 5), constructor_args=(4, 5),
input_size=(3, 4 * 5), input_size=(3, 4 * 5),
@ -375,10 +412,12 @@ tests = [
desc='stride_pad'), desc='stride_pad'),
OldModuleTest(nn.SpatialDivisiveNormalization, OldModuleTest(nn.SpatialDivisiveNormalization,
constructor_args=(3,), constructor_args=(3,),
input_size=(2, 3, 8, 8)), input_size=(2, 3, 8, 8),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.SpatialContrastiveNormalization, OldModuleTest(nn.SpatialContrastiveNormalization,
constructor_args=(3,), constructor_args=(3,),
input_size=(2, 3, 8, 8)), input_size=(2, 3, 8, 8),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.SpatialDilatedConvolution, OldModuleTest(nn.SpatialDilatedConvolution,
constructor_args=(3, 2, 3, 3, 2, 2, 1, 1, 2, 2), constructor_args=(3, 2, 3, 3, 2, 2, 1, 1, 2, 2),
input_size=(2, 3, 8, 8)), input_size=(2, 3, 8, 8)),
@ -436,13 +475,15 @@ tests = [
input_size=(1, 3, 7, 7)), input_size=(1, 3, 7, 7)),
OldModuleTest(nn.SpatialLPPooling, OldModuleTest(nn.SpatialLPPooling,
constructor_args=(3, 2, 2, 2, 2, 2), constructor_args=(3, 2, 2, 2, 2, 2),
input_size=(1, 3, 7, 7)), input_size=(1, 3, 7, 7),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.SpatialSubSampling, OldModuleTest(nn.SpatialSubSampling,
constructor_args=(3, 3, 3, 2, 2), constructor_args=(3, 3, 3, 2, 2),
input_size=(1, 3, 7, 7)), input_size=(1, 3, 7, 7)),
OldModuleTest(nn.SpatialSubtractiveNormalization, OldModuleTest(nn.SpatialSubtractiveNormalization,
constructor_args=(3,), constructor_args=(3,),
input_size=(1, 3, 7, 7)), input_size=(1, 3, 7, 7),
test_cuda=(not TEST_WITH_ROCM)),
OldModuleTest(nn.SpatialSubtractiveNormalization, OldModuleTest(nn.SpatialSubtractiveNormalization,
constructor_args=(3, torch.rand(3)), constructor_args=(3, torch.rand(3)),
input_size=(1, 3, 7, 7), input_size=(1, 3, 7, 7),
@ -521,7 +562,8 @@ tests = [
CriterionTest(nn.WeightedMSECriterion, CriterionTest(nn.WeightedMSECriterion,
constructor_args_fn=lambda: (torch.rand(3, 4, 5),), constructor_args_fn=lambda: (torch.rand(3, 4, 5),),
input_size=(2, 3, 4, 5), input_size=(2, 3, 4, 5),
target_size=(2, 3, 4, 5)), target_size=(2, 3, 4, 5),
test_cuda=(not TEST_WITH_ROCM)),
CriterionTest(nn.MarginCriterion, CriterionTest(nn.MarginCriterion,
input_size=(5, 10), input_size=(5, 10),
target_fn=lambda: torch.randn(5, 10).sign()), target_fn=lambda: torch.randn(5, 10).sign()),
@ -544,14 +586,16 @@ for p in (1, 2, 1.5):
input_size=(4, 5), input_size=(4, 5),
# Eh, we need to use p as a default, so it's passed by value # Eh, we need to use p as a default, so it's passed by value
reference_fn=lambda i, _, p=p: i.div(i.norm(p, 1, True).expand_as(i)), reference_fn=lambda i, _, p=p: i.div(i.norm(p, 1, True).expand_as(i)),
desc=str(p)), desc=str(p),
test_cuda=(not TEST_WITH_ROCM)),
) )
for p in range(1, 4 + 1): for p in range(1, 4 + 1):
tests.append( tests.append(
OldModuleTest(nn.PairwiseDistance, OldModuleTest(nn.PairwiseDistance,
constructor_args=(p,), constructor_args=(p,),
input_size=[(4, 10), (4, 10)], input_size=[(4, 10), (4, 10)],
desc=str(p)) desc=str(p),
test_cuda=(not TEST_WITH_ROCM))
) )
@ -613,6 +657,10 @@ def prepare_tests():
'KLDivLoss': 'DistKLDivCriterion', 'KLDivLoss': 'DistKLDivCriterion',
} }
for test in tests: for test in tests:
name = test.get_name()
if ((name == "test_Max" or name == "test_Min" or name == "test_Max_with_dimension" or
name == "test_Min_with_dimension") and TEST_WITH_ROCM):
continue
add_test(test) add_test(test)
for test_params in module_tests: for test_params in module_tests:
test_params = deepcopy(test_params) test_params = deepcopy(test_params)

File diff suppressed because it is too large Load Diff

View File

@ -3444,7 +3444,6 @@ class TestTorch(TestCase):
self.assertRaises(TypeError, lambda: q.topk(4, True)) self.assertRaises(TypeError, lambda: q.topk(4, True))
@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA') @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
@skipIfRocm
def test_topk_noncontiguous_gpu(self): def test_topk_noncontiguous_gpu(self):
t = torch.randn(20, device="cuda")[::2] t = torch.randn(20, device="cuda")[::2]
top1, idx1 = t.topk(5) top1, idx1 = t.topk(5)

View File

@ -75,6 +75,7 @@
"struct curandStateMtgp32*": "curandStateMtgp32*", "struct curandStateMtgp32*": "curandStateMtgp32*",
"__host__ void THCRandom_getRNGState": "extern \"C\" __host__ void THCRandom_getRNGState", "__host__ void THCRandom_getRNGState": "extern \"C\" __host__ void THCRandom_getRNGState",
"__host__ void THCRandom_setRNGState": "extern \"C\" __host__ void THCRandom_setRNGState", "__host__ void THCRandom_setRNGState": "extern \"C\" __host__ void THCRandom_setRNGState",
"state[threadIdx.x].k = kernel;" : "state[threadIdx.x].set_params(kernel);"
} }
}, },
{ {
@ -96,27 +97,6 @@
"struct mtgp32_kernel_params": "mtgp32_kernel_params" "struct mtgp32_kernel_params": "mtgp32_kernel_params"
} }
}, },
{
"path": "aten/src/ATen/native/cuda/CuFFTUtils.h",
"s_constants": {
"#include <cufft.h>": "",
"#include <cufftXt.h>": ""
}
},
{
"path": "aten/src/ATen/native/cuda/CuFFTPlanCache.h",
"s_constants": {
"#include <cufft.h>": "",
"#include <cufftXt.h>": ""
}
},
{
"path": "aten/src/ATen/native/cuda/SpectralOps.cu",
"s_constants": {
"#include <cufft.h>": "",
"#include <cufftXt.h>": ""
}
},
{ {
"path": "aten/src/ATen/native/cuda/RoiPooling.cu", "path": "aten/src/ATen/native/cuda/RoiPooling.cu",
"s_constants": { "s_constants": {
@ -141,9 +121,6 @@
} }
], ],
"disabled_modules": [ "disabled_modules": [
"aten/src/ATen/native/cuda/CuFFTUtils.h",
"aten/src/ATen/native/cuda/CuFFTPlanCache.h",
"aten/src/ATen/native/cuda/SpectralOps.cu",
], ],
"disabled_functions": [ "disabled_functions": [
{ {
@ -205,13 +182,6 @@
"functions": [ "functions": [
"THCTensor_(getTextureObject)" "THCTensor_(getTextureObject)"
] ]
},
{
"path": "aten/src/THC/THCTensorRandom.cu",
"functions": [
"THCRandom_setRNGState",
"set_rngstate_kernel"
]
} }
] ]
} }

View File

@ -50,7 +50,8 @@ API_BLAS = 39
API_SPARSE = 40 API_SPARSE = 40
API_RAND = 41 API_RAND = 41
API_LAST = 42 API_LAST = 42
API_FFT = 43
HIP_UNSUPPORTED = 43 HIP_UNSUPPORTED = 43
API_PYTORCH = 1337 API_PYTORCH = 1337
API_CAFFE2 = 1338 API_CAFFE2 = 1338

View File

@ -271,8 +271,8 @@ CUDA_INCLUDE_MAP = {
"curand_precalc.h": ("hiprand_kernel.h", CONV_INCLUDE, API_RAND), "curand_precalc.h": ("hiprand_kernel.h", CONV_INCLUDE, API_RAND),
"curand_uniform.h": ("hiprand_kernel.h", CONV_INCLUDE, API_RAND), "curand_uniform.h": ("hiprand_kernel.h", CONV_INCLUDE, API_RAND),
"cusparse.h": ("hipsparse.h", CONV_INCLUDE, API_RAND), "cusparse.h": ("hipsparse.h", CONV_INCLUDE, API_RAND),
"#include <cufft.h>": ("", CONV_INCLUDE, API_RAND, HIP_UNSUPPORTED), "cufft.h": ("hipfft.h", CONV_INCLUDE, API_BLAS),
"#include <cufftXt.h>": ("", CONV_INCLUDE, API_RAND, HIP_UNSUPPORTED), "cufftXt.h": ("hipfft.h", CONV_INCLUDE, API_BLAS),
"#include <nvfunctional>": ("", CONV_INCLUDE, API_RAND, HIP_UNSUPPORTED), "#include <nvfunctional>": ("", CONV_INCLUDE, API_RAND, HIP_UNSUPPORTED),
} }
@ -2095,7 +2095,77 @@ CUDA_IDENTIFIER_MAP = {
"curand_poisson": ("hiprand_poisson", CONV_DEVICE_FUNC, API_RAND), "curand_poisson": ("hiprand_poisson", CONV_DEVICE_FUNC, API_RAND),
"curand_poisson4": ("hiprand_poisson4", CONV_DEVICE_FUNC, API_RAND), "curand_poisson4": ("hiprand_poisson4", CONV_DEVICE_FUNC, API_RAND),
"curand_Philox4x32_10": ("hiprand_Philox4x32_10", CONV_DEVICE_FUNC, API_RAND, HIP_UNSUPPORTED), "curand_Philox4x32_10": ("hiprand_Philox4x32_10", CONV_DEVICE_FUNC, API_RAND, HIP_UNSUPPORTED),
"mtgp32_kernel_params": ("mtgp32_kernel_params_t", CONV_MATH_FUNC, API_RAND) "mtgp32_kernel_params": ("mtgp32_kernel_params_t", CONV_MATH_FUNC, API_RAND),
"CUFFT_FORWARD": ("HIPFFT_FORWARD", CONV_NUMERIC_LITERAL, API_BLAS),
"CUFFT_INVERSE": ("HIPFFT_BACKWARD", CONV_NUMERIC_LITERAL, API_BLAS),
"CUFFT_COMPATIBILITY_DEFAULT": ("HIPFFT_COMPATIBILITY_DEFAULT", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED),
"cufftResult_t": ("hipfftResult_t", CONV_TYPE, API_FFT),
"cufftResult": ("hipfftResult", CONV_TYPE, API_FFT),
"CUFFT_SUCCESS": ("HIPFFT_SUCCESS", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_INVALID_PLAN": ("HIPFFT_INVALID_PLAN", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_ALLOC_FAILED": ("HIPFFT_ALLOC_FAILED", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_INVALID_TYPE": ("HIPFFT_INVALID_TYPE", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_INVALID_VALUE": ("HIPFFT_INVALID_VALUE", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_INTERNAL_ERROR": ("HIPFFT_INTERNAL_ERROR", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_EXEC_FAILED": ("HIPFFT_EXEC_FAILED", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_SETUP_FAILED": ("HIPFFT_SETUP_FAILED", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_INVALID_SIZE": ("HIPFFT_INVALID_SIZE", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_UNALIGNED_DATA": ("HIPFFT_UNALIGNED_DATA", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_INCOMPLETE_PARAMETER_LIST": ("HIPFFT_INCOMPLETE_PARAMETER_LIST", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_INVALID_DEVICE": ("HIPFFT_INVALID_DEVICE", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_PARSE_ERROR": ("HIPFFT_PARSE_ERROR", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_NO_WORKSPACE": ("HIPFFT_NO_WORKSPACE", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_NOT_IMPLEMENTED": ("HIPFFT_NOT_IMPLEMENTED", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_LICENSE_ERROR": ("HIPFFT_LICENSE_ERROR", CONV_NUMERIC_LITERAL, API_FFT, HIP_UNSUPPORTED),
"CUFFT_NOT_SUPPORTED": ("HIPFFT_NOT_SUPPORTED", CONV_NUMERIC_LITERAL, API_FFT),
"cufftType_t": ("hipfftType_t", CONV_TYPE, API_FFT),
"cufftType": ("hipfftType", CONV_TYPE, API_FFT),
"CUFFT_R2C": ("HIPFFT_R2C", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_C2R": ("HIPFFT_C2R", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_C2C": ("HIPFFT_C2C", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_D2Z": ("HIPFFT_D2Z", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_Z2D": ("HIPFFT_Z2D", CONV_NUMERIC_LITERAL, API_FFT),
"CUFFT_Z2Z": ("HIPFFT_Z2Z", CONV_NUMERIC_LITERAL, API_FFT),
"cufftCompatibility_t": ("hipfftCompatibility_t", CONV_TYPE, API_FFT, HIP_UNSUPPORTED),
"cufftCompatibility": ("hipfftCompatibility", CONV_TYPE, API_FFT, HIP_UNSUPPORTED),
"CUFFT_COMPATIBILITY_FFTW_PADDING": ("HIPFFT_COMPATIBILITY_FFTW_PADDING", CONV_NUMERIC_LITERAL, API_FFT, HIP_UNSUPPORTED),
"cufftReal": ("hipfftReal", CONV_TYPE, API_FFT),
"cufftDoubleReal": ("hipfftDoubleReal", CONV_TYPE, API_FFT),
"cufftComplex": ("hipfftComplex", CONV_TYPE, API_FFT),
"cufftDoubleComplex": ("hipfftDoubleComplex", CONV_TYPE, API_FFT),
"cufftHandle": ("hipfftHandle", CONV_TYPE, API_FFT),
"cufftPlan1d": ("hipfftPlan1d", CONV_MATH_FUNC, API_FFT),
"cufftPlan2d": ("hipfftPlan2d", CONV_MATH_FUNC, API_FFT),
"cufftPlan3d": ("hipfftPlan3d", CONV_MATH_FUNC, API_FFT),
"cufftPlanMany": ("hipfftPlanMany", CONV_MATH_FUNC, API_FFT),
"cufftMakePlan1d": ("hipfftMakePlan1d", CONV_MATH_FUNC, API_FFT),
"cufftMakePlan2d": ("hipfftMakePlan2d", CONV_MATH_FUNC, API_FFT),
"cufftMakePlan3d": ("hipfftMakePlan3d", CONV_MATH_FUNC, API_FFT),
"cufftMakePlanMany": ("hipfftMakePlanMany", CONV_MATH_FUNC, API_FFT),
"cufftMakePlanMany64": ("hipfftMakePlanMany64", CONV_MATH_FUNC, API_FFT),
"cufftGetSizeMany64": ("hipfftGetSizeMany64", CONV_MATH_FUNC, API_FFT),
"cufftEstimate1d": ("hipfftEstimate1d", CONV_MATH_FUNC, API_FFT),
"cufftEstimate2d": ("hipfftEstimate2d", CONV_MATH_FUNC, API_FFT),
"cufftEstimate3d": ("hipfftEstimate3d", CONV_MATH_FUNC, API_FFT),
"cufftEstimateMany": ("hipfftEstimateMany", CONV_MATH_FUNC, API_FFT),
"cufftCreate": ("hipfftCreate", CONV_MATH_FUNC, API_FFT),
"cufftGetSize1d": ("hipfftGetSize1d", CONV_MATH_FUNC, API_FFT),
"cufftGetSize2d": ("hipfftGetSize2d", CONV_MATH_FUNC, API_FFT),
"cufftGetSize3d": ("hipfftGetSize3d", CONV_MATH_FUNC, API_FFT),
"cufftGetSizeMany": ("hipfftGetSizeMany", CONV_MATH_FUNC, API_FFT),
"cufftGetSize": ("hipfftGetSize", CONV_MATH_FUNC, API_FFT),
"cufftSetWorkArea": ("hipfftSetWorkArea", CONV_MATH_FUNC, API_FFT),
"cufftSetAutoAllocation": ("hipfftSetAutoAllocation", CONV_MATH_FUNC, API_FFT),
"cufftExecC2C": ("hipfftExecC2C", CONV_MATH_FUNC, API_FFT),
"cufftExecR2C": ("hipfftExecR2C", CONV_MATH_FUNC, API_FFT),
"cufftExecC2R": ("hipfftExecC2R", CONV_MATH_FUNC, API_FFT),
"cufftExecZ2Z": ("hipfftExecZ2Z", CONV_MATH_FUNC, API_FFT),
"cufftExecD2Z": ("hipfftExecD2Z", CONV_MATH_FUNC, API_FFT),
"cufftExecZ2D": ("hipfftExecZ2D", CONV_MATH_FUNC, API_FFT),
"cufftSetStream": ("hipfftSetStream", CONV_MATH_FUNC, API_FFT),
"cufftDestroy": ("hipfftDestroy", CONV_MATH_FUNC, API_FFT),
"cufftGetVersion": ("hipfftGetVersion", CONV_MATH_FUNC, API_FFT),
"cufftGetProperty": ("hipfftGetProperty", CONV_MATH_FUNC, API_FFT, HIP_UNSUPPORTED),
} }
CUDA_SPARSE_MAP = { CUDA_SPARSE_MAP = {

View File

@ -760,8 +760,8 @@ def preprocessor(filepath, stats, hipify_caffe2):
output_source = processKernelLaunches(output_source, stats) output_source = processKernelLaunches(output_source, stats)
# Disable asserts # Disable asserts
if not filepath.endswith("THCGeneral.h.in"): # if not filepath.endswith("THCGeneral.h.in"):
output_source = disable_asserts(output_source) # output_source = disable_asserts(output_source)
# Replace std:: with non-std:: versions # Replace std:: with non-std:: versions
output_source = replace_math_functions(output_source) output_source = replace_math_functions(output_source)