mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Revert D18632773: Split libtorch.so back into libtorch_{cpu,cuda,hip}
Test Plan: revert-hammer Differential Revision: D18632773 Original commit changeset: ea717c81e0d7 fbshipit-source-id: 18601439f9f81c9f389020e5a0e4e04adb21772d
This commit is contained in:
parent
eff4c4d7c1
commit
352731bd6e
|
|
@ -14,7 +14,7 @@ mkdir -p ${ZIP_DIR}/src
|
|||
cp -R ${ARTIFACTS_DIR}/arm64/include ${ZIP_DIR}/install/
|
||||
# build a FAT bianry
|
||||
cd ${ZIP_DIR}/install/lib
|
||||
target_libs=(libc10.a libclog.a libcpuinfo.a libeigen_blas.a libpytorch_qnnpack.a libtorch_cpu.a libtorch.a)
|
||||
target_libs=(libc10.a libclog.a libcpuinfo.a libeigen_blas.a libpytorch_qnnpack.a libtorch.a)
|
||||
for lib in ${target_libs[*]}
|
||||
do
|
||||
libs=(${ARTIFACTS_DIR}/x86_64/lib/${lib} ${ARTIFACTS_DIR}/arm64/lib/${lib})
|
||||
|
|
|
|||
|
|
@ -72,7 +72,6 @@ if (ANDROID_ABI)
|
|||
endfunction(import_static_lib)
|
||||
|
||||
import_static_lib(libtorch)
|
||||
import_static_lib(libtorch_cpu)
|
||||
import_static_lib(libc10)
|
||||
import_static_lib(libnnpack)
|
||||
import_static_lib(libpytorch_qnnpack)
|
||||
|
|
@ -86,7 +85,6 @@ if (ANDROID_ABI)
|
|||
-Wl,--gc-sections
|
||||
-Wl,--whole-archive
|
||||
libtorch
|
||||
libtorch_cpu
|
||||
-Wl,--no-whole-archive
|
||||
libc10
|
||||
libnnpack
|
||||
|
|
@ -104,7 +102,6 @@ else()
|
|||
-Wl,--gc-sections
|
||||
-Wl,--whole-archive
|
||||
torch
|
||||
torch_cpu
|
||||
-Wl,--no-whole-archive
|
||||
c10
|
||||
nnpack
|
||||
|
|
|
|||
|
|
@ -2,11 +2,9 @@
|
|||
|
||||
#include <ATen/core/Generator.h>
|
||||
|
||||
// TODO: this file should be in ATen/cuda, not top level
|
||||
|
||||
namespace at {
|
||||
|
||||
struct TORCH_CUDA_API CUDAGenerator : public Generator {
|
||||
struct CAFFE2_API CUDAGenerator : public Generator {
|
||||
// Constructors
|
||||
CUDAGenerator(DeviceIndex device_index = -1);
|
||||
~CUDAGenerator() = default;
|
||||
|
|
@ -30,8 +28,8 @@ private:
|
|||
namespace cuda {
|
||||
namespace detail {
|
||||
|
||||
TORCH_CUDA_API CUDAGenerator* getDefaultCUDAGenerator(DeviceIndex device_index = -1);
|
||||
TORCH_CUDA_API std::shared_ptr<CUDAGenerator> createCUDAGenerator(DeviceIndex device_index = -1);
|
||||
CAFFE2_API CUDAGenerator* getDefaultCUDAGenerator(DeviceIndex device_index = -1);
|
||||
CAFFE2_API std::shared_ptr<CUDAGenerator> createCUDAGenerator(DeviceIndex device_index = -1);
|
||||
|
||||
} // namespace detail
|
||||
} // namespace cuda
|
||||
|
|
|
|||
|
|
@ -93,71 +93,71 @@ CAFFE2_API Tensor& propagate_names(
|
|||
CAFFE2_API void propagate_names(Tensor& result, const Tensor& src);
|
||||
|
||||
// Propagates all names except for those at the excluded_idxs.
|
||||
CAFFE2_API void propagate_names_except(Tensor& result, const Tensor& src, IntArrayRef excluded_idxs);
|
||||
void propagate_names_except(Tensor& result, const Tensor& src, IntArrayRef excluded_idxs);
|
||||
|
||||
// Used for reduction ops that have a `keepdim` arg.
|
||||
CAFFE2_API void propagate_names_for_reduction(Tensor& result, const Tensor& src, IntArrayRef excluded_idxs, bool keepdim);
|
||||
void propagate_names_for_reduction(Tensor& result, const Tensor& src, IntArrayRef excluded_idxs, bool keepdim);
|
||||
|
||||
CAFFE2_API void propagate_names_for_expand(Tensor& result, const Tensor& self);
|
||||
void propagate_names_for_expand(Tensor& result, const Tensor& self);
|
||||
|
||||
CAFFE2_API std::vector<Dimname> compute_cat_outnames(TensorList tensors);
|
||||
std::vector<Dimname> compute_cat_outnames(TensorList tensors);
|
||||
|
||||
CAFFE2_API std::vector<Dimname> compute_broadcast_outnames(
|
||||
std::vector<Dimname> compute_broadcast_outnames(
|
||||
const Tensor& self,
|
||||
const Tensor& other);
|
||||
|
||||
CAFFE2_API std::vector<Dimname> broadcast_to_outnames(
|
||||
std::vector<Dimname> broadcast_to_outnames(
|
||||
const Tensor& tensor,
|
||||
const Tensor& reference_tensor,
|
||||
const char* op_name);
|
||||
|
||||
CAFFE2_API std::vector<Dimname> compute_matmul_outnames(const Tensor& self, const Tensor& other);
|
||||
std::vector<Dimname> compute_matmul_outnames(const Tensor& self, const Tensor& other);
|
||||
|
||||
CAFFE2_API std::vector<Dimname> compute_cdist_outnames(const Tensor& self, const Tensor& other);
|
||||
std::vector<Dimname> compute_cdist_outnames(const Tensor& self, const Tensor& other);
|
||||
|
||||
CAFFE2_API std::vector<Dimname> compute_bmm_outnames(
|
||||
std::vector<Dimname> compute_bmm_outnames(
|
||||
Tensor& result,
|
||||
const Tensor& self,
|
||||
const Tensor& other);
|
||||
|
||||
CAFFE2_API std::vector<Dimname> compute_squeeze_outnames(const Tensor& tensor);
|
||||
std::vector<Dimname> compute_squeeze_outnames(const Tensor& tensor);
|
||||
|
||||
// TensorImpl* overloads for Legacy TH/THC code. Use these sparingly.
|
||||
|
||||
CAFFE2_API TensorImpl* propagate_names_if_nonempty(
|
||||
TensorImpl* propagate_names_if_nonempty(
|
||||
TensorImpl* result,
|
||||
DimnameList maybe_names,
|
||||
bool validate_names = false);
|
||||
|
||||
CAFFE2_API TensorImpl* propagate_names(
|
||||
TensorImpl* propagate_names(
|
||||
TensorImpl* result,
|
||||
DimnameList names,
|
||||
bool validate_names = false);
|
||||
|
||||
CAFFE2_API void propagate_names(TensorImpl* result, /*const */TensorImpl* src);
|
||||
void propagate_names(TensorImpl* result, /*const */TensorImpl* src);
|
||||
|
||||
// result = m1 @ m2 + bias
|
||||
CAFFE2_API void propagate_names_for_addmm(
|
||||
void propagate_names_for_addmm(
|
||||
TensorImpl* result,
|
||||
/*const*/TensorImpl* m1,
|
||||
/*const*/TensorImpl* m2,
|
||||
/*const*/TensorImpl* bias);
|
||||
|
||||
CAFFE2_API void propagate_names_for_addmv(
|
||||
void propagate_names_for_addmv(
|
||||
TensorImpl* result,
|
||||
TensorImpl* mat,
|
||||
TensorImpl* vec,
|
||||
TensorImpl* bias);
|
||||
|
||||
CAFFE2_API void check_names_for_dot(TensorImpl* vec1, TensorImpl* vec2);
|
||||
void check_names_for_dot(TensorImpl* vec1, TensorImpl* vec2);
|
||||
|
||||
CAFFE2_API std::vector<Dimname> compute_baddbmm_outnames(
|
||||
std::vector<Dimname> compute_baddbmm_outnames(
|
||||
TensorImpl* result,
|
||||
TensorImpl* self,
|
||||
TensorImpl* other,
|
||||
TensorImpl* bias);
|
||||
|
||||
CAFFE2_API bool are_names_equal(TensorImpl* self, TensorImpl* other);
|
||||
bool are_names_equal(TensorImpl* self, TensorImpl* other);
|
||||
|
||||
} // namespace namedinference
|
||||
|
||||
|
|
|
|||
|
|
@ -359,7 +359,6 @@ DEFINE_DISPATCH(sinh_stub);
|
|||
DEFINE_DISPATCH(sqrt_stub);
|
||||
DEFINE_DISPATCH(tan_stub);
|
||||
DEFINE_DISPATCH(tanh_stub);
|
||||
DEFINE_DISPATCH(trigamma_stub);
|
||||
DEFINE_DISPATCH(trunc_stub);
|
||||
DEFINE_DISPATCH(lgamma_stub);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
#include <ATen/native/TensorIterator.h>
|
||||
#include <ATen/native/cpu/Loops.h>
|
||||
#include <ATen/core/DistributionsHelper.h>
|
||||
#include <ATen/native/UnaryOps.h>
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
|
|
@ -130,14 +129,15 @@ void multinomial_apply(Tensor& result, const Tensor& self, const int64_t n_sampl
|
|||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
static void multinomial_kernel_impl(Tensor& result, const Tensor& self, const int64_t n_sample, const bool with_replacement, Generator *gen) {
|
||||
AT_DISPATCH_FLOATING_TYPES_AND_HALF(self.scalar_type(), "multinomial", [&] {
|
||||
multinomial_apply<scalar_t>(result, self, n_sample, with_replacement, gen);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(multinomial_stub, &multinomial_kernel_impl);
|
||||
|
||||
}} // namespace at::native
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -331,47 +331,19 @@ void GeluBackwardCUDAKernelImpl(TensorIterator& it) {
|
|||
Tensor gelu_cuda(const Tensor& self) {
|
||||
Tensor Y = at::native::empty_like(self, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
|
||||
auto it = TensorIterator::unary_op(Y, self);
|
||||
GeluCUDAKernelImpl(it);
|
||||
GeluKernel(kCUDA, it);
|
||||
return Y;
|
||||
}
|
||||
|
||||
Tensor gelu_backward_cuda(const Tensor& grad, const Tensor& self) {
|
||||
Tensor dX = at::native::empty_like(self, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
|
||||
auto it = TensorIterator::binary_op(dX, grad, self);
|
||||
GeluBackwardCUDAKernelImpl(it);
|
||||
GeluBackwardKernel(kCUDA, it);
|
||||
return dX;
|
||||
}
|
||||
|
||||
// computes `result = self <= threshold ? value : other`
|
||||
// other is `self` in threshold() and `grad` in threshold_backward()
|
||||
static Tensor threshold_out_cuda(
|
||||
optional<Tensor> opt_result,
|
||||
const Tensor& self,
|
||||
Scalar threshold,
|
||||
Scalar value,
|
||||
const Tensor& other) {
|
||||
Tensor result = opt_result.value_or(Tensor());
|
||||
auto iter = TensorIterator::binary_op(result, self, other);
|
||||
threshold_kernel(iter, threshold, value);
|
||||
return iter.output();
|
||||
}
|
||||
|
||||
Tensor threshold_cuda(const Tensor& self, Scalar threshold, Scalar value) {
|
||||
return threshold_out_cuda(nullopt, self, threshold, value, self);
|
||||
}
|
||||
|
||||
Tensor& threshold__cuda(Tensor& self, Scalar threshold, Scalar value) {
|
||||
threshold_out_cuda(make_optional(self), self, threshold, value, self);
|
||||
return self;
|
||||
}
|
||||
|
||||
Tensor& threshold_out_cuda(Tensor& result, const Tensor& self, Scalar threshold, Scalar value) {
|
||||
threshold_out_cuda(make_optional(result), self, threshold, value, self);
|
||||
return result;
|
||||
}
|
||||
|
||||
Tensor threshold_backward_cuda(const Tensor& grad, const Tensor& self, Scalar threshold) {
|
||||
return threshold_out_cuda(nullopt, self, threshold, 0, grad);
|
||||
}
|
||||
REGISTER_DISPATCH(threshold_stub, &threshold_kernel);
|
||||
REGISTER_DISPATCH(GeluKernel, &GeluCUDAKernelImpl);
|
||||
REGISTER_DISPATCH(GeluBackwardKernel, &GeluBackwardCUDAKernelImpl);
|
||||
|
||||
}} // namespace at::native
|
||||
|
|
|
|||
|
|
@ -2627,29 +2627,17 @@
|
|||
use_c10_dispatcher: full
|
||||
variants: function
|
||||
supports_named_tensor: True
|
||||
dispatch:
|
||||
CPU: threshold
|
||||
CUDA: threshold_cuda
|
||||
|
||||
- func: threshold_(Tensor(a!) self, Scalar threshold, Scalar value) -> Tensor(a!)
|
||||
variants: function
|
||||
supports_named_tensor: True
|
||||
dispatch:
|
||||
CPU: threshold_
|
||||
CUDA: threshold__cuda
|
||||
|
||||
- func: threshold.out(Tensor self, Scalar threshold, Scalar value, *, Tensor(a!) out) -> Tensor(a!)
|
||||
supports_named_tensor: True
|
||||
dispatch:
|
||||
CPU: threshold_out
|
||||
CUDA: threshold_out_cuda
|
||||
|
||||
- func: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor
|
||||
use_c10_dispatcher: full
|
||||
variants: function
|
||||
dispatch:
|
||||
CPU: threshold_backward
|
||||
CUDA: threshold_backward_cuda
|
||||
|
||||
- func: transpose.int(Tensor(a) self, int dim0, int dim1) -> Tensor(a)
|
||||
variants: function, method
|
||||
|
|
|
|||
|
|
@ -11,11 +11,10 @@
|
|||
|
||||
namespace at {
|
||||
namespace native {
|
||||
namespace {
|
||||
|
||||
DEFINE_DISPATCH(qadaptive_avg_pool2d_nhwc_stub);
|
||||
|
||||
namespace {
|
||||
|
||||
inline int start_index(int out_idx, int out_len, int in_len) {
|
||||
/*
|
||||
* out_idx: the current index of output matrix
|
||||
|
|
|
|||
|
|
@ -15,11 +15,10 @@
|
|||
|
||||
namespace at {
|
||||
namespace native {
|
||||
namespace {
|
||||
|
||||
DEFINE_DISPATCH(qavg_pool2d_nhwc_stub);
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename scalar_t>
|
||||
static void avg_pool2d_out_frame(
|
||||
const Tensor& input,
|
||||
|
|
|
|||
|
|
@ -9,12 +9,11 @@
|
|||
|
||||
namespace at {
|
||||
namespace native {
|
||||
namespace {
|
||||
|
||||
DEFINE_DISPATCH(qcat_nhwc_stub);
|
||||
DEFINE_DISPATCH(qcat_relu_nhwc_stub);
|
||||
|
||||
namespace {
|
||||
|
||||
bool is_cat_nhwc_fast_path(const c10::List<Tensor>& qxs, int dim) {
|
||||
TORCH_CHECK(qxs.size() > 0);
|
||||
bool is_fast_path = dim == 1;
|
||||
|
|
|
|||
|
|
@ -16,11 +16,10 @@
|
|||
|
||||
namespace at {
|
||||
namespace native {
|
||||
namespace {
|
||||
|
||||
DEFINE_DISPATCH(qmaxpool_2d_nhwc_stub);
|
||||
|
||||
namespace {
|
||||
|
||||
/* Computes the spatial 2D max pooling with dilation.
|
||||
|
||||
Argument description in the argument list.
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
namespace at { namespace native {
|
||||
|
||||
TORCH_API sparse::SparseTensor& mul_out_sparse_scalar(sparse::SparseTensor& r, const sparse::SparseTensor& t, Scalar value);
|
||||
TORCH_API sparse::SparseTensor& mul_out_sparse_zerodim(sparse::SparseTensor& r, const sparse::SparseTensor& t, const Tensor& value);
|
||||
sparse::SparseTensor& mul_out_sparse_scalar(sparse::SparseTensor& r, const sparse::SparseTensor& t, Scalar value);
|
||||
sparse::SparseTensor& mul_out_sparse_zerodim(sparse::SparseTensor& r, const sparse::SparseTensor& t, const Tensor& value);
|
||||
|
||||
}}
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ int THCStorage_getDevice(THCState* state, const THCStorage* storage) {
|
|||
return storage->device().index();
|
||||
}
|
||||
|
||||
THCStorage* THCStorage_new(
|
||||
THC_API THCStorage* THCStorage_new(
|
||||
THCState* state,
|
||||
caffe2::TypeMeta data_type) {
|
||||
THStorage* storage = c10::make_intrusive<at::StorageImpl>(
|
||||
|
|
|
|||
|
|
@ -12,10 +12,7 @@
|
|||
#define MAX_NUM_BLOCKS 200
|
||||
#define BLOCK_SIZE 256
|
||||
|
||||
// NB: ROCm compiler seems to have a bug where __host__ functions must be
|
||||
// explicitly specified extern "C" otherwise ROCm compiler doesn't respect it.
|
||||
// See https://github.com/RadeonOpenCompute/hcc/issues/839
|
||||
extern "C" __host__ void THCRandom_getRNGState(at::Generator *gen_, THByteTensor *rng_state)
|
||||
THC_API __host__ void THCRandom_getRNGState(at::Generator *gen_, THByteTensor *rng_state)
|
||||
{
|
||||
auto gen = at::check_generator<at::CUDAGenerator>(gen_);
|
||||
std::lock_guard<std::mutex> lock(gen->mutex_);
|
||||
|
|
@ -40,7 +37,7 @@ extern "C" __host__ void THCRandom_getRNGState(at::Generator *gen_, THByteTensor
|
|||
memcpy(THByteTensor_data(rng_state) + states_size + seed_size, &offset, offset_size);
|
||||
}
|
||||
|
||||
extern "C" __host__ void THCRandom_setRNGState(at::Generator *gen_, THByteTensor *rng_state)
|
||||
THC_API __host__ void THCRandom_setRNGState(at::Generator *gen_, THByteTensor *rng_state)
|
||||
{
|
||||
auto gen = at::check_generator<at::CUDAGenerator>(gen_);
|
||||
std::lock_guard<std::mutex> lock(gen->mutex_);
|
||||
|
|
|
|||
|
|
@ -99,18 +99,12 @@
|
|||
#define CAFFE2_API C10_IMPORT
|
||||
#endif
|
||||
|
||||
// NB: For now, HIP is overloaded to use the same macro, but ideally
|
||||
// HIPify should translate TORCH_CUDA_API to TORCH_HIP_API
|
||||
#if defined(TORCH_CUDA_BUILD_MAIN_LIB) || defined(TORCH_HIP_BUILD_MAIN_LIB)
|
||||
// This one will eventually be used by libtorch_cuda.so, but for
|
||||
// now it has the same function as CAFFE2_API
|
||||
#ifdef CAFFE2_BUILD_MAIN_LIB
|
||||
#define TORCH_CUDA_API C10_EXPORT
|
||||
#else
|
||||
#define TORCH_CUDA_API C10_IMPORT
|
||||
#endif
|
||||
|
||||
#if defined(TORCH_HIP_BUILD_MAIN_LIB)
|
||||
#define TORCH_HIP_API C10_EXPORT
|
||||
#else
|
||||
#define TORCH_HIP_API C10_IMPORT
|
||||
#endif
|
||||
|
||||
#endif // C10_MACROS_MACROS_H_
|
||||
|
|
|
|||
|
|
@ -478,6 +478,11 @@ if (NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
|||
)
|
||||
endif()
|
||||
|
||||
if (USE_NCCL)
|
||||
list(APPEND TORCH_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
|
||||
endif()
|
||||
|
||||
if (NOT INTERN_BUILD_MOBILE)
|
||||
list(APPEND TORCH_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/api/src/jit.cpp
|
||||
|
|
@ -530,10 +535,6 @@ if (NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
|||
target_link_libraries(caffe2_nvrtc ${CUDA_NVRTC} ${CUDA_CUDA_LIB} ${CUDA_NVRTC_LIB})
|
||||
target_include_directories(caffe2_nvrtc PRIVATE ${CUDA_INCLUDE_DIRS})
|
||||
install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
|
||||
if (USE_NCCL)
|
||||
list(APPEND Caffe2_GPU_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (USE_ROCM)
|
||||
|
|
@ -543,10 +544,6 @@ if (NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
|||
${TORCH_SRC_DIR}/csrc/autograd/functions/comm.cpp
|
||||
${TORCH_SRC_DIR}/csrc/cuda/comm.cpp
|
||||
)
|
||||
if (USE_NCCL)
|
||||
list(APPEND Caffe2_HIP_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
|
||||
endif()
|
||||
# caffe2_nvrtc's stubs to driver APIs are useful for HIP.
|
||||
# See NOTE [ ATen NVRTC Stub and HIP ]
|
||||
add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS})
|
||||
|
|
@ -619,52 +616,41 @@ endif()
|
|||
# ==========================================================
|
||||
|
||||
|
||||
add_library(torch_cpu ${Caffe2_CPU_SRCS})
|
||||
torch_compile_options(torch_cpu) # see cmake/public/utils.cmake
|
||||
|
||||
# This is required for older versions of CMake, which don't allow
|
||||
# specifying add_library() without a list of source files
|
||||
set(DUMMY_EMPTY_FILE ${CMAKE_BINARY_DIR}/empty.cpp)
|
||||
|
||||
if (MSVC)
|
||||
set(DUMMY_FILE_CONTENT "__declspec(dllexport) int ignore_this_library_placeholder(){return 0\\;}")
|
||||
else()
|
||||
set(DUMMY_FILE_CONTENT "")
|
||||
# Instead of separate .so libraries, GPU sources are now conditionally
|
||||
# compiled into the main torch.so library.
|
||||
if(USE_CUDA)
|
||||
list(APPEND Caffe2_CPU_SRCS ${Caffe2_GPU_SRCS})
|
||||
foreach(tmp ${Caffe2_GPU_SRCS})
|
||||
message(STATUS " " ${tmp})
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
file(WRITE ${DUMMY_EMPTY_FILE} ${DUMMY_FILE_CONTENT})
|
||||
|
||||
# Wrapper library for people who link against torch and expect both CPU and CUDA support
|
||||
# Contains "torch_cpu" and "torch_cuda"
|
||||
add_library(torch ${DUMMY_EMPTY_FILE})
|
||||
|
||||
if(USE_ROCM)
|
||||
filter_list(__caffe2_hip_srcs_cpp Caffe2_HIP_SRCS "\\.(cu|hip)$")
|
||||
set_source_files_properties(${__caffe2_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
|
||||
list(APPEND Caffe2_CPU_SRCS ${Caffe2_HIP_SRCS})
|
||||
endif()
|
||||
|
||||
|
||||
# Compile exposed libraries.
|
||||
IF (USE_ROCM)
|
||||
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
|
||||
hip_add_library(torch_hip ${Caffe2_HIP_SRCS})
|
||||
hip_add_library(torch ${Caffe2_CPU_SRCS})
|
||||
set(CUDA_LINK_LIBRARIES_KEYWORD)
|
||||
torch_compile_options(torch_hip) # see cmake/public/utils.cmake
|
||||
# TODO: Not totally sure if this is live or not
|
||||
if (USE_NCCL)
|
||||
target_link_libraries(torch_hip PRIVATE __caffe2_nccl)
|
||||
target_compile_definitions(torch_hip PRIVATE USE_NCCL)
|
||||
endif()
|
||||
ELSEIF(USE_CUDA)
|
||||
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
|
||||
cuda_add_library(torch_cuda ${Caffe2_GPU_SRCS})
|
||||
cuda_add_library(torch ${Caffe2_CPU_SRCS})
|
||||
set(CUDA_LINK_LIBRARIES_KEYWORD)
|
||||
torch_compile_options(torch_cuda) # see cmake/public/utils.cmake
|
||||
if (USE_NCCL)
|
||||
target_link_libraries(torch_cuda PRIVATE __caffe2_nccl)
|
||||
target_compile_definitions(torch_cuda PRIVATE USE_NCCL)
|
||||
endif()
|
||||
ELSE()
|
||||
add_library(torch ${Caffe2_CPU_SRCS})
|
||||
ENDIF()
|
||||
|
||||
if (USE_NCCL)
|
||||
target_link_libraries(torch PRIVATE __caffe2_nccl)
|
||||
target_compile_definitions(torch PRIVATE USE_NCCL)
|
||||
endif()
|
||||
|
||||
|
||||
# ==========================================================
|
||||
|
|
@ -676,12 +662,14 @@ if (NOT INTERN_BUILD_MOBILE)
|
|||
# Adding the generated header file to the ${TORCH_SRCS} list is not sufficient
|
||||
# to establish the dependency, since the generation procedure is declared in a different CMake file.
|
||||
# See https://samthursfield.wordpress.com/2015/11/21/cmake-dependencies-between-targets-and-files-and-custom-commands/#custom-commands-in-different-directories
|
||||
add_dependencies(torch_cpu Caffe2_PROTO)
|
||||
add_dependencies(torch Caffe2_PROTO)
|
||||
endif()
|
||||
|
||||
torch_compile_options(torch) # see cmake/public/utils.cmake
|
||||
|
||||
if (NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
||||
if (NOT NO_API)
|
||||
target_include_directories(torch_cpu PRIVATE
|
||||
target_include_directories(torch PRIVATE
|
||||
${TORCH_SRC_DIR}/csrc/api
|
||||
${TORCH_SRC_DIR}/csrc/api/include)
|
||||
endif()
|
||||
|
|
@ -698,7 +686,7 @@ if (NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
|||
set(TORCH_CUDA_LIBRARIES
|
||||
${NVTOOLEXT_HOME}/lib/x64/nvToolsExt64_1.lib
|
||||
${CUDA_LIBRARIES})
|
||||
target_include_directories(torch_cuda PUBLIC "${NVTOOLEXT_HOME}/include")
|
||||
target_include_directories(torch PUBLIC "${NVTOOLEXT_HOME}/include")
|
||||
|
||||
elseif(APPLE)
|
||||
set(TORCH_CUDA_LIBRARIES
|
||||
|
|
@ -713,6 +701,8 @@ if (NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
|||
${LIBNVTOOLSEXT}
|
||||
${CUDA_LIBRARIES})
|
||||
endif()
|
||||
|
||||
target_compile_definitions(torch PRIVATE USE_CUDA)
|
||||
endif()
|
||||
|
||||
|
||||
|
|
@ -723,7 +713,7 @@ if (NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
|||
${TORCH_ROOT}/aten/src
|
||||
${CMAKE_CURRENT_BINARY_DIR}/aten/src
|
||||
${CMAKE_BINARY_DIR}/aten/src)
|
||||
target_include_directories(torch_cpu PRIVATE ${TH_CPU_INCLUDE})
|
||||
target_include_directories(torch PRIVATE ${TH_CPU_INCLUDE})
|
||||
|
||||
set(ATen_CPU_INCLUDE
|
||||
${TORCH_ROOT}/aten/src
|
||||
|
|
@ -733,16 +723,16 @@ if (NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
|||
|
||||
IF (USE_TBB)
|
||||
list(APPEND ATen_CPU_INCLUDE ${TBB_ROOT_DIR}/include)
|
||||
target_link_libraries(torch_cpu PUBLIC tbb)
|
||||
target_link_libraries(torch PUBLIC tbb)
|
||||
ENDIF()
|
||||
|
||||
|
||||
target_include_directories(torch_cpu PRIVATE ${ATen_CPU_INCLUDE})
|
||||
target_include_directories(torch PRIVATE ${ATen_CPU_INCLUDE})
|
||||
|
||||
target_include_directories(torch_cpu PRIVATE
|
||||
target_include_directories(torch PRIVATE
|
||||
${TORCH_SRC_DIR}/csrc)
|
||||
|
||||
target_include_directories(torch_cpu PRIVATE
|
||||
target_include_directories(torch PRIVATE
|
||||
${TORCH_ROOT}/third_party/miniz-2.0.8)
|
||||
|
||||
|
||||
|
|
@ -825,7 +815,7 @@ endif()
|
|||
|
||||
|
||||
if (NOT NO_API)
|
||||
target_include_directories(torch_cpu PUBLIC
|
||||
target_include_directories(torch PUBLIC
|
||||
$<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/api>
|
||||
$<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/api/include>)
|
||||
endif()
|
||||
|
|
@ -838,17 +828,17 @@ if(USE_OPENMP AND OPENMP_FOUND)
|
|||
message(STATUS "pytorch is compiling with OpenMP. \n"
|
||||
"OpenMP CXX_FLAGS: ${OpenMP_CXX_FLAGS}. \n"
|
||||
"OpenMP libraries: ${OpenMP_CXX_LIBRARIES}.")
|
||||
target_compile_options(torch_cpu INTERFACE ${OpenMP_CXX_FLAGS})
|
||||
target_link_libraries(torch_cpu PRIVATE ${OpenMP_CXX_LIBRARIES})
|
||||
target_compile_options(torch INTERFACE ${OpenMP_CXX_FLAGS})
|
||||
target_link_libraries(torch PRIVATE ${OpenMP_CXX_LIBRARIES})
|
||||
endif()
|
||||
|
||||
|
||||
if(USE_ROCM)
|
||||
target_compile_definitions(torch_hip PRIVATE
|
||||
target_compile_definitions(torch PRIVATE
|
||||
USE_ROCM
|
||||
__HIP_PLATFORM_HCC__
|
||||
)
|
||||
target_include_directories(torch_hip PRIVATE
|
||||
target_include_directories(torch PRIVATE
|
||||
/opt/rocm/include
|
||||
/opt/rocm/hcc/include
|
||||
/opt/rocm/rocblas/include
|
||||
|
|
@ -858,11 +848,11 @@ endif()
|
|||
|
||||
if (NOT INTERN_BUILD_MOBILE OR BUILD_CAFFE2_MOBILE)
|
||||
caffe2_interface_library(caffe2_protos caffe2_protos_whole)
|
||||
target_link_libraries(torch_cpu PRIVATE caffe2_protos_whole)
|
||||
target_link_libraries(torch PRIVATE caffe2_protos_whole)
|
||||
if (${CAFFE2_LINK_LOCAL_PROTOBUF})
|
||||
target_link_libraries(torch_cpu INTERFACE protobuf::libprotobuf)
|
||||
target_link_libraries(torch INTERFACE protobuf::libprotobuf)
|
||||
else()
|
||||
target_link_libraries(torch_cpu PUBLIC protobuf::libprotobuf)
|
||||
target_link_libraries(torch PUBLIC protobuf::libprotobuf)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
@ -870,38 +860,24 @@ if (USE_OPENMP AND OPENMP_FOUND)
|
|||
message(STATUS "Caffe2 is compiling with OpenMP. \n"
|
||||
"OpenMP CXX_FLAGS: ${OpenMP_CXX_FLAGS}. \n"
|
||||
"OpenMP libraries: ${OpenMP_CXX_LIBRARIES}.")
|
||||
target_link_libraries(torch_cpu PRIVATE ${OpenMP_CXX_LIBRARIES})
|
||||
target_link_libraries(torch PRIVATE ${OpenMP_CXX_LIBRARIES})
|
||||
endif()
|
||||
|
||||
target_link_libraries(torch_cpu PUBLIC c10)
|
||||
target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
|
||||
target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
|
||||
target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
|
||||
target_include_directories(torch_cpu INTERFACE $<INSTALL_INTERFACE:include>)
|
||||
target_include_directories(torch_cpu PRIVATE ${Caffe2_CPU_INCLUDE})
|
||||
target_include_directories(torch_cpu SYSTEM PRIVATE "${Caffe2_DEPENDENCY_INCLUDE}")
|
||||
target_link_libraries(torch PUBLIC c10)
|
||||
target_link_libraries(torch PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
|
||||
target_link_libraries(torch PRIVATE ${Caffe2_DEPENDENCY_LIBS})
|
||||
target_link_libraries(torch PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
|
||||
target_include_directories(torch INTERFACE $<INSTALL_INTERFACE:include>)
|
||||
target_include_directories(torch PRIVATE ${Caffe2_CPU_INCLUDE})
|
||||
target_include_directories(torch SYSTEM PRIVATE "${Caffe2_DEPENDENCY_INCLUDE}")
|
||||
# Set standard properties on the target
|
||||
torch_set_target_props(torch_cpu)
|
||||
torch_set_target_props(torch)
|
||||
|
||||
|
||||
target_link_libraries(torch PUBLIC torch_cpu)
|
||||
if(USE_CUDA)
|
||||
target_link_libraries(torch PUBLIC torch_cuda)
|
||||
elseif(USE_ROCM)
|
||||
target_link_libraries(torch PUBLIC torch_hip)
|
||||
endif()
|
||||
|
||||
|
||||
target_compile_options(torch_cpu PRIVATE "-DCAFFE2_BUILD_MAIN_LIB")
|
||||
if(USE_CUDA)
|
||||
target_compile_options(torch_cuda PRIVATE "-DTORCH_CUDA_BUILD_MAIN_LIB")
|
||||
# NB: This must be target_compile_definitions, not target_compile_options,
|
||||
# as the latter is not respected by nvcc
|
||||
target_compile_definitions(torch_cuda PRIVATE "-DTORCH_CUDA_BUILD_MAIN_LIB")
|
||||
elseif(USE_ROCM)
|
||||
target_compile_options(torch_hip PRIVATE "-DTORCH_HIP_BUILD_MAIN_LIB")
|
||||
target_compile_definitions(torch_hip PRIVATE "-DTORCH_HIP_BUILD_MAIN_LIB")
|
||||
endif()
|
||||
target_compile_options(torch PRIVATE "-DCAFFE2_BUILD_MAIN_LIB")
|
||||
# NB: This must be target_compile_definitions, not target_compile_options,
|
||||
# as the latter is not respected by nvcc
|
||||
target_compile_definitions(torch PRIVATE "-DCAFFE2_BUILD_MAIN_LIB")
|
||||
|
||||
|
||||
# ATen parallelism settings
|
||||
|
|
@ -916,21 +892,21 @@ endif()
|
|||
|
||||
message(STATUS "Using ATen parallel backend: ${ATEN_THREADING}")
|
||||
if ("${ATEN_THREADING}" STREQUAL "OMP")
|
||||
target_compile_definitions(torch_cpu PUBLIC "-DAT_PARALLEL_OPENMP=1")
|
||||
target_compile_definitions(torch PUBLIC "-DAT_PARALLEL_OPENMP=1")
|
||||
elseif ("${ATEN_THREADING}" STREQUAL "NATIVE")
|
||||
target_compile_definitions(torch_cpu PUBLIC "-DAT_PARALLEL_NATIVE=1")
|
||||
target_compile_definitions(torch PUBLIC "-DAT_PARALLEL_NATIVE=1")
|
||||
elseif ("${ATEN_THREADING}" STREQUAL "TBB")
|
||||
if (NOT USE_TBB)
|
||||
message(FATAL_ERROR "Using TBB backend but USE_TBB is off")
|
||||
endif()
|
||||
target_compile_definitions(torch_cpu PUBLIC "-DAT_PARALLEL_NATIVE_TBB=1")
|
||||
target_compile_definitions(torch PUBLIC "-DAT_PARALLEL_NATIVE_TBB=1")
|
||||
else()
|
||||
message(FATAL_ERROR "Unknown ATen parallel backend: ${ATEN_THREADING}")
|
||||
endif()
|
||||
set(EXPERIMENTAL_SINGLE_THREAD_POOL "0" CACHE STRING
|
||||
"Experimental option to use a single thread pool for inter- and intra-op parallelism")
|
||||
if ("${EXPERIMENTAL_SINGLE_THREAD_POOL}")
|
||||
target_compile_definitions(torch_cpu PUBLIC "-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1")
|
||||
target_compile_definitions(torch PUBLIC "-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1")
|
||||
endif()
|
||||
|
||||
if (MSVC AND NOT BUILD_SHARED_LIBS)
|
||||
|
|
@ -1000,20 +976,14 @@ if (MSVC AND NOT BUILD_SHARED_LIBS)
|
|||
#
|
||||
# NB: This must be target_compile_definitions, not target_compile_options,
|
||||
# as the latter is not respected by nvcc
|
||||
target_compile_definitions(torch_cpu PUBLIC "AT_CORE_STATIC_WINDOWS=1")
|
||||
target_compile_definitions(torch PUBLIC "AT_CORE_STATIC_WINDOWS=1")
|
||||
endif()
|
||||
if (MSVC AND BUILD_SHARED_LIBS)
|
||||
# ONNX is linked statically and needs to be exported from this library
|
||||
# to be used externally. Make sure that references match the export.
|
||||
target_compile_options(torch_cpu PRIVATE "-DONNX_BUILD_MAIN_LIB")
|
||||
target_compile_options(torch PRIVATE "-DONNX_BUILD_MAIN_LIB")
|
||||
endif()
|
||||
|
||||
install(TARGETS torch_cpu EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
|
||||
if (USE_CUDA)
|
||||
install(TARGETS torch_cuda EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
|
||||
elseif (USE_ROCM)
|
||||
install(TARGETS torch_hip EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
|
||||
endif()
|
||||
install(TARGETS torch EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
|
||||
|
||||
|
||||
|
|
@ -1025,33 +995,28 @@ endif()
|
|||
|
||||
# Install PDB files for MSVC builds
|
||||
if (MSVC AND BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:torch_cpu> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL)
|
||||
if(USE_CUDA)
|
||||
install(FILES $<TARGET_PDB_FILE:torch_cuda> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL)
|
||||
elseif(USE_ROCM)
|
||||
install(FILES $<TARGET_PDB_FILE:torch_hip> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL)
|
||||
endif()
|
||||
install(FILES $<TARGET_PDB_FILE:torch> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL)
|
||||
endif()
|
||||
|
||||
# ---[ CUDA library.
|
||||
if(USE_CUDA)
|
||||
|
||||
target_link_libraries(torch_cuda INTERFACE torch::cudart)
|
||||
target_link_libraries(torch_cuda PUBLIC c10_cuda)
|
||||
target_link_libraries(torch INTERFACE torch::cudart)
|
||||
target_link_libraries(torch PUBLIC c10_cuda)
|
||||
|
||||
target_link_libraries(torch_cuda PUBLIC ${TORCH_CUDA_LIBRARIES})
|
||||
target_link_libraries(torch PUBLIC ${TORCH_CUDA_LIBRARIES})
|
||||
|
||||
target_include_directories(
|
||||
torch_cuda INTERFACE $<INSTALL_INTERFACE:include>)
|
||||
torch INTERFACE $<INSTALL_INTERFACE:include>)
|
||||
target_include_directories(
|
||||
torch_cuda PRIVATE ${Caffe2_GPU_INCLUDE})
|
||||
torch PRIVATE ${Caffe2_GPU_INCLUDE})
|
||||
target_link_libraries(
|
||||
torch_cuda PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS})
|
||||
torch PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS})
|
||||
|
||||
# These public dependencies must go after the previous dependencies, as the
|
||||
# order of the libraries in the linker call matters here when statically
|
||||
# linking; libculibos and cublas must be last.
|
||||
target_link_libraries(torch_cuda PUBLIC torch_cpu ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
|
||||
target_link_libraries(torch PUBLIC ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
|
||||
|
||||
|
||||
endif()
|
||||
|
|
@ -1072,8 +1037,8 @@ if(USE_ROCM)
|
|||
hip_include_directories(${Caffe2_HIP_INCLUDE})
|
||||
|
||||
# Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added.
|
||||
target_compile_options(torch_hip PRIVATE ${HIP_CXX_FLAGS})
|
||||
target_link_libraries(torch_hip PUBLIC c10_hip)
|
||||
target_compile_options(torch PRIVATE ${HIP_CXX_FLAGS})
|
||||
target_link_libraries(torch PUBLIC c10_hip)
|
||||
|
||||
if(NOT INTERN_BUILD_MOBILE)
|
||||
# TODO: Cut this over to ATEN_HIP_FILES_GEN_LIB. At the moment, we
|
||||
|
|
@ -1081,13 +1046,13 @@ if(USE_ROCM)
|
|||
# NB: This dependency must be PRIVATE, because we don't install
|
||||
# ATEN_CUDA_FILES_GEN_LIB (it's a synthetic target just to get the
|
||||
# correct dependency from generated files.)
|
||||
target_link_libraries(torch_hip PRIVATE ATEN_CUDA_FILES_GEN_LIB)
|
||||
target_link_libraries(torch PRIVATE ATEN_CUDA_FILES_GEN_LIB)
|
||||
endif()
|
||||
target_link_libraries(torch_hip PUBLIC torch_cpu ${Caffe2_HIP_DEPENDENCY_LIBS})
|
||||
target_link_libraries(torch PUBLIC ${Caffe2_HIP_DEPENDENCY_LIBS})
|
||||
|
||||
# Since PyTorch files contain HIP headers, this is also needed to capture the includes.
|
||||
target_include_directories(torch_hip PRIVATE ${Caffe2_HIP_INCLUDE})
|
||||
target_include_directories(torch_hip INTERFACE $<INSTALL_INTERFACE:include>)
|
||||
target_include_directories(torch PRIVATE ${Caffe2_HIP_INCLUDE})
|
||||
target_include_directories(torch INTERFACE $<INSTALL_INTERFACE:include>)
|
||||
endif()
|
||||
|
||||
# ---[ Test binaries.
|
||||
|
|
|
|||
|
|
@ -29,11 +29,40 @@
|
|||
#include "c10/cuda/CUDAMathCompat.h"
|
||||
#include <c10/cuda/CUDAGuard.h>
|
||||
|
||||
#define CAFFE2_CUDA_EXPORT C10_EXPORT
|
||||
#define CAFFE2_CUDA_API TORCH_CUDA_API
|
||||
// Defines CAFFE2_CUDA_EXPORT and CAFFE2_CUDA_IMPORT. On Windows, this
|
||||
// corresponds to different declarations (dllexport and dllimport). On
|
||||
// Linux/Mac, it just resolves to the same "default visibility" setting.
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(CAFFE2_BUILD_SHARED_LIBS)
|
||||
#define CAFFE2_CUDA_EXPORT __declspec(dllexport)
|
||||
#define CAFFE2_CUDA_IMPORT __declspec(dllimport)
|
||||
#else
|
||||
#define CAFFE2_CUDA_EXPORT
|
||||
#define CAFFE2_CUDA_IMPORT
|
||||
#endif
|
||||
#else
|
||||
#if defined(__GNUC__)
|
||||
#define CAFFE2_CUDA_EXPORT __attribute__((__visibility__("default")))
|
||||
#else
|
||||
#define CAFFE2_CUDA_EXPORT
|
||||
#endif
|
||||
#define CAFFE2_CUDA_IMPORT CAFFE2_CUDA_EXPORT
|
||||
#endif
|
||||
|
||||
#define CAFFE2_HIP_EXPORT C10_EXPORT
|
||||
#define CAFFE2_HIP_API TORCH_HIP_API
|
||||
// CAFFE2_CUDA_API is a macro that, depends on whether you are building the
|
||||
// main caffe2 library or not, resolves to either CAFFE2_CUDA_EXPORT or
|
||||
// CAFFE2_CUDA_IMPORT.
|
||||
//
|
||||
// This is used in e.g. Caffe2's protobuf files: when building the main library,
|
||||
// it is defined as CAFFE2_CUDA_EXPORT to fix a Windows global-variable-in-dll
|
||||
// issue, and for anyone dependent on Caffe2 it will be defined as
|
||||
// CAFFE2_CUDA_IMPORT.
|
||||
|
||||
#ifdef CAFFE2_BUILD_MAIN_LIB
|
||||
#define CAFFE2_CUDA_API CAFFE2_CUDA_EXPORT
|
||||
#else
|
||||
#define CAFFE2_CUDA_API CAFFE2_CUDA_IMPORT
|
||||
#endif
|
||||
|
||||
// This is a macro defined for cuda fp16 support. In default, cuda fp16 is
|
||||
// supported by NVCC 7.5, but it is also included in the Tegra X1 platform with
|
||||
|
|
|
|||
|
|
@ -26,21 +26,21 @@ struct BlobState {
|
|||
is_tensor(is_tensor) {}
|
||||
};
|
||||
|
||||
CAFFE2_API std::string buildBlobNameFromDbKey(
|
||||
std::string buildBlobNameFromDbKey(
|
||||
const std::string& dbKey,
|
||||
const std::string& strip_prefix = "",
|
||||
const std::string& add_prefix = "");
|
||||
|
||||
// We are tracking sizes of already read tensor parts while reading data
|
||||
// chunks. This way we can make sure that all chunks were loaded in the end.
|
||||
CAFFE2_API void ProcessBlob(
|
||||
void ProcessBlob(
|
||||
Blob* blob,
|
||||
const BlobProto& proto,
|
||||
std::unordered_map<std::string, BlobState>* blob_states_ptr,
|
||||
const std::string& key,
|
||||
int* loaded_blobs);
|
||||
|
||||
CAFFE2_API void validateBlobStates(
|
||||
void validateBlobStates(
|
||||
const std::unordered_map<std::string, BlobState>& blob_states);
|
||||
|
||||
} // namespace load_save_op_util
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ Pod::Spec.new do |s|
|
|||
end
|
||||
s.user_target_xcconfig = {
|
||||
'HEADER_SEARCH_PATHS' => '$(inherited) "$(PODS_ROOT)/LibTorch/install/include/"',
|
||||
'OTHER_LDFLAGS' => '-force_load "$(PODS_ROOT)/LibTorch/install/lib/libtorch.a" -force_load "$(PODS_ROOT)/LibTorch/install/lib/libtorch_cpu.a"',
|
||||
'OTHER_LDFLAGS' => '-force_load "$(PODS_ROOT)/LibTorch/install/lib/libtorch.a"',
|
||||
'CLANG_CXX_LANGUAGE_STANDARD' => 'c++11',
|
||||
'CLANG_CXX_LIBRARY' => 'libc++'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ target.resources_build_phase.add_file_reference(config_file_ref, true)
|
|||
|
||||
puts "Linking static libraries..."
|
||||
target.frameworks_build_phases.clear
|
||||
libs = ['libc10.a', 'libclog.a', 'libnnpack.a', 'libeigen_blas.a', 'libcpuinfo.a', 'libpytorch_qnnpack.a', 'libtorch_cpu.a', 'libtorch.a']
|
||||
libs = ['libc10.a', 'libclog.a', 'libnnpack.a', 'libeigen_blas.a', 'libcpuinfo.a', 'libpytorch_qnnpack.a', 'libtorch.a']
|
||||
for lib in libs do
|
||||
path = "#{install_path}/lib/#{lib}"
|
||||
if File.exist?(path)
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ end
|
|||
|
||||
# link static libraries
|
||||
target.frameworks_build_phases.clear
|
||||
libs = ['libc10.a', 'libclog.a', 'libnnpack.a', 'libeigen_blas.a', 'libcpuinfo.a', 'libpytorch_qnnpack.a', 'libtorch_cpu.a', 'libtorch.a']
|
||||
libs = ['libc10.a', 'libclog.a', 'libnnpack.a', 'libeigen_blas.a', 'libcpuinfo.a', 'libpytorch_qnnpack.a', 'libtorch.a']
|
||||
for lib in libs do
|
||||
path = "#{install_path}/lib/#{lib}"
|
||||
if File.exist?(path)
|
||||
|
|
|
|||
|
|
@ -458,7 +458,6 @@ def CppExtension(name, sources, *args, **kwargs):
|
|||
libraries = kwargs.get('libraries', [])
|
||||
libraries.append('c10')
|
||||
libraries.append('torch')
|
||||
libraries.append('torch_cpu')
|
||||
libraries.append('torch_python')
|
||||
libraries.append('_C')
|
||||
kwargs['libraries'] = libraries
|
||||
|
|
@ -504,8 +503,6 @@ def CUDAExtension(name, sources, *args, **kwargs):
|
|||
if IS_WINDOWS:
|
||||
libraries.append('c10')
|
||||
libraries.append('c10_cuda')
|
||||
libraries.append('torch_cpu')
|
||||
libraries.append('torch_cuda')
|
||||
libraries.append('torch')
|
||||
libraries.append('torch_python')
|
||||
libraries.append('_C')
|
||||
|
|
@ -946,11 +943,6 @@ def _prepare_ldflags(extra_ldflags, with_cuda, verbose):
|
|||
lib_path = os.path.join(torch_path, 'lib')
|
||||
|
||||
extra_ldflags.append('c10.lib')
|
||||
if with_cuda:
|
||||
extra_ldflags.append('c10_cuda.lib')
|
||||
extra_ldflags.append('torch_cpu.lib')
|
||||
if with_cuda:
|
||||
extra_ldflags.append('torch_cuda.lib')
|
||||
extra_ldflags.append('torch.lib')
|
||||
extra_ldflags.append('torch_python.lib')
|
||||
extra_ldflags.append('_C.lib')
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user