mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Remove many caffe2::TIndex and replace them with int64_t (#11943)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/11943 See title Reviewed By: ezyang Differential Revision: D9992645 fbshipit-source-id: e8f80d6ea762971513e5e8072975ceea53e1f11a
This commit is contained in:
parent
5d0f1c3c8f
commit
a6630e25af
|
|
@ -139,7 +139,7 @@ BENCHMARK(BM_cudaStreamWaitEventThenStreamSynchronize);
|
|||
|
||||
static void BM_CudaPointerAffinity(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
Tensor tensor(vector<TIndex>{1, 2, 3, 4}, CUDA);
|
||||
Tensor tensor(vector<int64_t>{1, 2, 3, 4}, CUDA);
|
||||
float* ptr = tensor.mutable_data<float>();
|
||||
while (state.KeepRunning()) {
|
||||
volatile int id = GetGPUIDForPointer(ptr);
|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ private:
|
|||
}
|
||||
template <typename T>
|
||||
void assignToValue(Tensor* dst, T v) {
|
||||
dst->Resize(std::vector<TIndex>());
|
||||
dst->Resize(std::vector<int64_t>());
|
||||
math::Set(1, v, dst->template mutable_data<T>(), &context_);
|
||||
}
|
||||
int findImplementation(const OperatorDef& operator_def) {
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ class AllgatherOp final : public Operator<Context> {
|
|||
auto comm_size =
|
||||
OperatorBase::Input<std::shared_ptr<::gloo::Context>>(0)->size;
|
||||
const auto dims =
|
||||
std::vector<TIndex>(1, (InputSize() - 1) * Input(1).size() * comm_size);
|
||||
std::vector<int64_t>(1, (InputSize() - 1) * Input(1).size() * comm_size);
|
||||
Output(0)->Resize(dims);
|
||||
|
||||
// Store which inputs/outputs this instance initialized with
|
||||
|
|
|
|||
|
|
@ -269,7 +269,7 @@ void NCCL<T>::AllGather(const NCCLExecution& ex) {
|
|||
ex,
|
||||
[n](const NCCLElement& ctx) {
|
||||
CAFFE_ENFORCE_NE(ctx.src, ctx.dst);
|
||||
std::vector<TIndex> dims;
|
||||
std::vector<int64_t> dims;
|
||||
dims.reserve(ctx.src->ndim() + 1);
|
||||
dims.push_back(n);
|
||||
for (auto d : ctx.src->dims()) {
|
||||
|
|
@ -307,7 +307,7 @@ void NCCL<T>::ReduceScatter(const NCCLExecution& ex) {
|
|||
[](const NCCLElement& ctx) {
|
||||
CAFFE_ENFORCE_NE(ctx.src, ctx.dst);
|
||||
const auto& srcDims = ctx.src->dims();
|
||||
std::vector<TIndex> dstDims(srcDims.begin() + 1, srcDims.end());
|
||||
std::vector<int64_t> dstDims(srcDims.begin() + 1, srcDims.end());
|
||||
ctx.dst->Resize(dstDims);
|
||||
ctx.dst->template mutable_data<T>();
|
||||
},
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ namespace {
|
|||
// Otherwise, return the product of CHW dimensions
|
||||
int64_t CheckDims(
|
||||
const nvinfer1::Dims& nv_dims,
|
||||
const std::vector<TIndex>& c2_dims) {
|
||||
const std::vector<int64_t>& c2_dims) {
|
||||
if (nv_dims.nbDims + 1 != c2_dims.size()) {
|
||||
CAFFE_THROW(
|
||||
"Mismatched dimensions between TRT input (",
|
||||
|
|
@ -115,7 +115,7 @@ TensorRTOp::TensorRTOp(const OperatorDef& operator_def, Workspace* ws)
|
|||
const std::string key = MakeString("output_size_hint_", output_idx);
|
||||
auto output_size_hint = OperatorBase::GetRepeatedArgument<int>(key);
|
||||
if (!output_size_hint.empty()) {
|
||||
std::vector<TIndex> dims;
|
||||
std::vector<int64_t> dims;
|
||||
for (const auto v : output_size_hint) {
|
||||
dims.push_back(v);
|
||||
}
|
||||
|
|
@ -130,17 +130,17 @@ TensorRTOp::TensorRTOp(const OperatorDef& operator_def, Workspace* ws)
|
|||
|
||||
void TensorRTOp::MaybeAdjustOutputShape(
|
||||
int output_idx,
|
||||
std::vector<TIndex>* dims) {
|
||||
std::vector<int64_t>* dims) {
|
||||
const auto it = output_size_hints_.find(output_idx);
|
||||
if (it != output_size_hints_.end()) {
|
||||
const auto& dims_hint = it->second;
|
||||
auto total_trt = std::accumulate(
|
||||
dims->begin(), dims->end(), (TIndex)(1), std::multiplies<TIndex>());
|
||||
dims->begin(), dims->end(), (int64_t)(1), std::multiplies<int64_t>());
|
||||
auto total_c2 = std::accumulate(
|
||||
dims_hint.begin(),
|
||||
dims_hint.end(),
|
||||
(TIndex)(1),
|
||||
std::multiplies<TIndex>());
|
||||
(int64_t)(1),
|
||||
std::multiplies<int64_t>());
|
||||
CAFFE_ENFORCE_EQ(
|
||||
total_trt,
|
||||
total_c2,
|
||||
|
|
@ -204,7 +204,7 @@ bool TensorRTOp::RunOnDevice() {
|
|||
} else {
|
||||
// output, we need to allocate the output tensor at first batch run
|
||||
auto* output_tensor = Output(output_idx);
|
||||
std::vector<TIndex> tensor_dims;
|
||||
std::vector<int64_t> tensor_dims;
|
||||
tensor_dims.push_back(N);
|
||||
int64_t chw = 1;
|
||||
for (int i = 0; i < dims.nbDims; ++i) {
|
||||
|
|
|
|||
|
|
@ -17,13 +17,13 @@ class TensorRTOp final : public Operator<CUDAContext> {
|
|||
virtual ~TensorRTOp() noexcept {}
|
||||
|
||||
private:
|
||||
void MaybeAdjustOutputShape(int output_idx, std::vector<TIndex>* dims);
|
||||
void MaybeAdjustOutputShape(int output_idx, std::vector<int64_t>* dims);
|
||||
|
||||
tensorrt::TrtLogger logger_;
|
||||
int max_batch_size_;
|
||||
std::vector<nvinfer1::Dims> nv_dims_;
|
||||
std::vector<bool> is_input_;
|
||||
std::unordered_map<int, std::vector<TIndex>> output_size_hints_;
|
||||
std::unordered_map<int, std::vector<int64_t>> output_size_hints_;
|
||||
std::shared_ptr<nvinfer1::ICudaEngine> trt_engine_{nullptr};
|
||||
std::shared_ptr<nvinfer1::IExecutionContext> trt_executor_{nullptr};
|
||||
bool batch_warning_issued_{false};
|
||||
|
|
|
|||
|
|
@ -139,7 +139,7 @@ void TensorSerializer::SerializeWithChunkSize(
|
|||
// Serialize whole vector. If vector is empty, it's shape still needs to be
|
||||
// serialized in empty proto
|
||||
for (size_t chunkBegin = 0;
|
||||
chunkBegin < std::max(tensor.size(), static_cast<TIndex>(1));
|
||||
chunkBegin < std::max(tensor.size(), static_cast<int64_t>(1));
|
||||
chunkBegin += chunk_size) {
|
||||
VLOG(2) << "Starting a chunk at " << chunkBegin;
|
||||
#ifndef __ANDROID__
|
||||
|
|
@ -374,8 +374,8 @@ void TensorDeserializer::Deserialize(const TensorProto& proto, Tensor* tensor) {
|
|||
tensor->GetStaticContext()->CreateContext(proto.device_detail());
|
||||
auto context = uniq_ptr.get();
|
||||
context->SwitchToDevice(0);
|
||||
vector<TIndex> dims;
|
||||
for (const TIndex d : proto.dims()) {
|
||||
vector<int64_t> dims;
|
||||
for (const int64_t d : proto.dims()) {
|
||||
dims.push_back(d);
|
||||
}
|
||||
tensor->Resize(dims);
|
||||
|
|
|
|||
|
|
@ -557,9 +557,9 @@ TEST(TensorTest, TensorNonFundamentalTypeClone) {
|
|||
|
||||
TEST(TensorTest, Tensor64BitDimension) {
|
||||
// Initialize a large tensor.
|
||||
TIndex large_number =
|
||||
int64_t large_number =
|
||||
static_cast<int64_t>(std::numeric_limits<int>::max()) + 1;
|
||||
Tensor tensor(vector<TIndex>{large_number}, CPU);
|
||||
Tensor tensor(vector<int64_t>{large_number}, CPU);
|
||||
EXPECT_EQ(tensor.ndim(), 1);
|
||||
EXPECT_EQ(tensor.dim(0), large_number);
|
||||
EXPECT_EQ(tensor.size(), large_number);
|
||||
|
|
@ -589,9 +589,9 @@ TEST(TensorTest, Tensor64BitDimension) {
|
|||
}
|
||||
|
||||
TEST(TensorDeathTest, CannotCastDownLargeDims) {
|
||||
TIndex large_number =
|
||||
int64_t large_number =
|
||||
static_cast<int64_t>(std::numeric_limits<int>::max()) + 1;
|
||||
Tensor tensor(vector<TIndex>{large_number}, CPU);
|
||||
Tensor tensor(vector<int64_t>{large_number}, CPU);
|
||||
EXPECT_EQ(tensor.ndim(), 1);
|
||||
EXPECT_EQ(tensor.dim(0), large_number);
|
||||
ASSERT_THROW(tensor.dim32(0), EnforceNotMet);
|
||||
|
|
@ -694,7 +694,7 @@ TEST(TensorTest, TensorSerialization_CustomType) {
|
|||
}
|
||||
|
||||
TEST(TensorTest, Half) {
|
||||
const TIndex kSize = 3000000;
|
||||
const int64_t kSize = 3000000;
|
||||
Blob blob;
|
||||
TensorCPU* tensor = blob.GetMutableTensor(CPU);
|
||||
tensor->Resize(kSize);
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ using EnforceNotMet = at::Error;
|
|||
* functions to caffe2::enforce_detail namespace. For example:
|
||||
*
|
||||
* namespace caffe2 { namespace enforce_detail {
|
||||
* inline EnforceFailMessage IsVector(const vector<TIndex>& shape) {
|
||||
* inline EnforceFailMessage IsVector(const vector<int64_t>& shape) {
|
||||
* if (shape.size() == 1) { return EnforceOK(); }
|
||||
* return MakeString("Shape ", shape, " is not a vector");
|
||||
* }
|
||||
|
|
|
|||
|
|
@ -581,7 +581,7 @@ TensorShapes InferBlobShapesAndTypesFromWorkspace(
|
|||
}
|
||||
|
||||
TensorShapes InferBlobShapesAndTypesFromMap(
|
||||
const CaffeMap<std::string, std::vector<TIndex>>& blob_dimensions,
|
||||
const CaffeMap<std::string, std::vector<int64_t>>& blob_dimensions,
|
||||
const vector<NetDef*>& nets) {
|
||||
CaffeMap<string, TensorShape> blob_desc;
|
||||
// Populate shapes from known blobs
|
||||
|
|
@ -597,7 +597,7 @@ TensorShapes InferBlobShapesAndTypesFromMap(
|
|||
}
|
||||
|
||||
TensorShapes InferBlobShapesAndTypesFromMap(
|
||||
const CaffeMap<std::string, std::vector<TIndex>>& blob_dimensions,
|
||||
const CaffeMap<std::string, std::vector<int64_t>>& blob_dimensions,
|
||||
const CaffeMap<std::string, TensorProto_DataType>& blob_types,
|
||||
const vector<NetDef*>& nets) {
|
||||
CaffeMap<string, TensorShape> blob_desc;
|
||||
|
|
|
|||
|
|
@ -700,7 +700,7 @@ struct DispatchHelper<FixedValues<FirstVal, Values...>, ExtraArgs...> {
|
|||
template <typename... ExtraArgs>
|
||||
struct DispatchHelper<FixedValues<>, ExtraArgs...> {
|
||||
template <typename Op>
|
||||
static bool call(Op* op, TIndex /*size*/) {
|
||||
static bool call(Op* op, int64_t /*size*/) {
|
||||
return op->template DoRunWithValue<ExtraArgs..., -1>();
|
||||
}
|
||||
};
|
||||
|
|
@ -973,11 +973,11 @@ CAFFE2_API TensorShapes InferBlobShapesAndTypesFromWorkspace(
|
|||
const vector<NetDef*>& nets);
|
||||
|
||||
CAFFE2_API TensorShapes InferBlobShapesAndTypesFromMap(
|
||||
const CaffeMap<std::string, std::vector<TIndex>>& blob_dimensions,
|
||||
const CaffeMap<std::string, std::vector<int64_t>>& blob_dimensions,
|
||||
const vector<NetDef*>& nets);
|
||||
|
||||
CAFFE2_API TensorShapes InferBlobShapesAndTypesFromMap(
|
||||
const CaffeMap<std::string, std::vector<TIndex>>& blob_dimensions,
|
||||
const CaffeMap<std::string, std::vector<int64_t>>& blob_dimensions,
|
||||
const CaffeMap<std::string, TensorProto_DataType>& blob_types,
|
||||
const vector<NetDef*>& nets);
|
||||
|
||||
|
|
|
|||
|
|
@ -331,7 +331,7 @@ int OpSchema::CalculateOutput(int num_input) const {
|
|||
}
|
||||
|
||||
static void SparseLengthsFillerHelper(
|
||||
const std::vector<std::vector<TIndex>>& shapes,
|
||||
const std::vector<std::vector<int64_t>>& shapes,
|
||||
size_t value_index,
|
||||
size_t length_index,
|
||||
std::vector<TensorFiller>* fillers) {
|
||||
|
|
@ -341,7 +341,7 @@ static void SparseLengthsFillerHelper(
|
|||
}
|
||||
|
||||
static void SparseSegmentsFillerHelper(
|
||||
const std::vector<std::vector<TIndex>>& shapes,
|
||||
const std::vector<std::vector<int64_t>>& shapes,
|
||||
size_t value_index,
|
||||
size_t segment_index,
|
||||
std::vector<TensorFiller>* fillers) {
|
||||
|
|
@ -364,7 +364,7 @@ OpSchema& OpSchema::ValueKeyLengthInputFillers(
|
|||
size_t key_index,
|
||||
size_t length_index) {
|
||||
filler_supplier_ = [this, value_index, key_index, length_index](
|
||||
const std::vector<std::vector<TIndex>>& shapes) {
|
||||
const std::vector<std::vector<int64_t>>& shapes) {
|
||||
auto fillers = SupplyDenseFillers(shapes);
|
||||
// fill in the length (value_index is used to get the correct shape)
|
||||
SparseLengthsFillerHelper(shapes, key_index, length_index, &fillers);
|
||||
|
|
@ -383,7 +383,7 @@ OpSchema& OpSchema::ValueLengthInputFillers(
|
|||
size_t value_index,
|
||||
size_t length_index) {
|
||||
filler_supplier_ = [this, value_index, length_index](
|
||||
const std::vector<std::vector<TIndex>>& shapes) {
|
||||
const std::vector<std::vector<int64_t>>& shapes) {
|
||||
auto fillers = SupplyDenseFillers(shapes);
|
||||
// fill in the length (value_index is used to get the correct shape)
|
||||
SparseLengthsFillerHelper(shapes, value_index, length_index, &fillers);
|
||||
|
|
@ -394,7 +394,7 @@ OpSchema& OpSchema::ValueLengthInputFillers(
|
|||
|
||||
OpSchema& OpSchema::DisallowInputFillers() {
|
||||
filler_supplier_ =
|
||||
[this](const std::vector<std::vector<TIndex>>& /* unused */) {
|
||||
[this](const std::vector<std::vector<int64_t>>& /* unused */) {
|
||||
throw std::invalid_argument(type_ + " does not have input fillers");
|
||||
return std::vector<TensorFiller>();
|
||||
};
|
||||
|
|
@ -402,12 +402,12 @@ OpSchema& OpSchema::DisallowInputFillers() {
|
|||
}
|
||||
|
||||
std::vector<TensorFiller> OpSchema::InputFillers(
|
||||
const std::vector<std::vector<TIndex>>& shapes) const {
|
||||
const std::vector<std::vector<int64_t>>& shapes) const {
|
||||
return filler_supplier_(shapes);
|
||||
}
|
||||
|
||||
std::vector<TensorFiller> OpSchema::SupplyDenseFillers(
|
||||
const std::vector<std::vector<TIndex>>& shapes) {
|
||||
const std::vector<std::vector<int64_t>>& shapes) {
|
||||
std::vector<TensorFiller> fillers;
|
||||
for (const auto& shape : shapes) {
|
||||
fillers.emplace_back(shape);
|
||||
|
|
|
|||
|
|
@ -383,11 +383,11 @@ class CAFFE2_API OpSchema {
|
|||
OpSchema& DisallowInputFillers();
|
||||
|
||||
std::vector<TensorFiller> InputFillers(
|
||||
const std::vector<std::vector<TIndex>>& shapes) const;
|
||||
const std::vector<std::vector<int64_t>>& shapes) const;
|
||||
|
||||
private:
|
||||
std::vector<TensorFiller> SupplyDenseFillers(
|
||||
const std::vector<std::vector<TIndex>>& shapes);
|
||||
const std::vector<std::vector<int64_t>>& shapes);
|
||||
|
||||
private:
|
||||
string type_;
|
||||
|
|
@ -438,9 +438,9 @@ class CAFFE2_API OpSchema {
|
|||
};
|
||||
|
||||
std::function<std::vector<TensorFiller>(
|
||||
const std::vector<std::vector<TIndex>>&)>
|
||||
const std::vector<std::vector<int64_t>>&)>
|
||||
filler_supplier_ =
|
||||
[this](const std::vector<std::vector<TIndex>>& shapes) {
|
||||
[this](const std::vector<std::vector<int64_t>>& shapes) {
|
||||
return SupplyDenseFillers(shapes);
|
||||
};
|
||||
};
|
||||
|
|
@ -508,8 +508,8 @@ inline TensorShape CreateTensorShape(
|
|||
}
|
||||
|
||||
// Helper function
|
||||
inline vector<TIndex> GetDimsVector(const TensorShape& shape) {
|
||||
vector<TIndex> dims;
|
||||
inline vector<int64_t> GetDimsVector(const TensorShape& shape) {
|
||||
vector<int64_t> dims;
|
||||
for (auto d : shape.dims()) {
|
||||
dims.push_back(d);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -212,8 +212,8 @@ class CAFFE2_EXPORT QTensor {
|
|||
/**
|
||||
* Return product of all dimensions starting from K.
|
||||
*/
|
||||
inline TIndex size_from_dim(int k) const {
|
||||
TIndex r = 1;
|
||||
inline int64_t size_from_dim(int k) const {
|
||||
int64_t r = 1;
|
||||
for (int i = k; i < dims_.size(); ++i) {
|
||||
r *= dims_[i];
|
||||
}
|
||||
|
|
@ -223,9 +223,9 @@ class CAFFE2_EXPORT QTensor {
|
|||
/**
|
||||
* Product of all dims up to.
|
||||
*/
|
||||
inline TIndex size_to_dim(int k) const {
|
||||
inline int64_t size_to_dim(int k) const {
|
||||
CAFFE_ENFORCE(k < dims_.size());
|
||||
TIndex r = 1;
|
||||
int64_t r = 1;
|
||||
for (int i = 0; i < k; ++i) {
|
||||
r *= dims_[i];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ void RegisterTypeCallFunction(TypeIdentifier id, TypeCall c) {
|
|||
|
||||
int GetGPUIDForPointer(const void* ptr);
|
||||
|
||||
vector<TIndex> GetTensorInfo(
|
||||
vector<int64_t> GetTensorInfo(
|
||||
const void* c,
|
||||
size_t* capacity,
|
||||
DeviceOption* device) {
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ class CAFFE2_API Tensor final {
|
|||
* Note that the actual data allocation is not going to be carried out until
|
||||
* the first time mutable_data() is called.
|
||||
*/
|
||||
explicit Tensor(const vector<TIndex>& dims, DeviceType type)
|
||||
explicit Tensor(const vector<int64_t>& dims, DeviceType type)
|
||||
: Tensor(Storage(type)) {
|
||||
// TODO: here, we create a Storage
|
||||
// and immediately discard it in Resize() since
|
||||
|
|
@ -96,7 +96,7 @@ class CAFFE2_API Tensor final {
|
|||
*/
|
||||
template <typename T>
|
||||
Tensor(
|
||||
const vector<TIndex>& dims,
|
||||
const vector<int64_t>& dims,
|
||||
const vector<T>& values,
|
||||
BaseContext* context)
|
||||
: Tensor(Storage(context->device_type(), TypeMeta::Make<T>())) {
|
||||
|
|
@ -115,7 +115,7 @@ class CAFFE2_API Tensor final {
|
|||
typename = typename std::enable_if<std::is_scalar<T>::value>::type>
|
||||
Tensor(const T& value, BaseContext* context)
|
||||
: Tensor(Storage(context->device_type(), TypeMeta::Make<T>())) {
|
||||
Resize(std::vector<TIndex>{});
|
||||
Resize(std::vector<int64_t>{});
|
||||
context->CopyItemsFromCPU(
|
||||
storage().dtype(), size(), &value, mutable_data<T>());
|
||||
}
|
||||
|
|
@ -142,15 +142,15 @@ class CAFFE2_API Tensor final {
|
|||
impl_.get()->CopyFrom(*src.impl_.get(), context);
|
||||
}
|
||||
|
||||
void ExtendTo(TIndex num, float growthPct, BaseContext* context) const {
|
||||
void ExtendTo(int64_t num, float growthPct, BaseContext* context) const {
|
||||
impl_.get()->ExtendTo(num, growthPct, context);
|
||||
}
|
||||
|
||||
void Extend(TIndex num, float growthPct, BaseContext* context) const {
|
||||
void Extend(int64_t num, float growthPct, BaseContext* context) const {
|
||||
impl_.get()->Extend(num, growthPct, context);
|
||||
}
|
||||
|
||||
void ShrinkTo(TIndex outer_dim) const {
|
||||
void ShrinkTo(int64_t outer_dim) const {
|
||||
impl_.get()->ShrinkTo(outer_dim);
|
||||
}
|
||||
|
||||
|
|
@ -168,7 +168,7 @@ class CAFFE2_API Tensor final {
|
|||
impl_.get()->ResizeLike(*src_tensor.impl_.get());
|
||||
}
|
||||
|
||||
inline void Reshape(const vector<TIndex>& dims) const {
|
||||
inline void Reshape(const vector<int64_t>& dims) const {
|
||||
impl_.get()->Reshape(dims);
|
||||
}
|
||||
|
||||
|
|
@ -250,7 +250,7 @@ class CAFFE2_API Tensor final {
|
|||
return impl_.get()->ndim();
|
||||
}
|
||||
|
||||
inline TIndex size() const {
|
||||
inline int64_t size() const {
|
||||
return impl_.get()->size();
|
||||
}
|
||||
|
||||
|
|
@ -266,19 +266,19 @@ class CAFFE2_API Tensor final {
|
|||
return impl_.get()->capacity_nbytes();
|
||||
}
|
||||
|
||||
inline const vector<TIndex>& dims() const {
|
||||
inline const vector<int64_t>& dims() const {
|
||||
return impl_.get()->dims();
|
||||
}
|
||||
|
||||
inline TIndex size_from_dim(int k) const {
|
||||
inline int64_t size_from_dim(int k) const {
|
||||
return impl_.get()->size_from_dim(k);
|
||||
}
|
||||
|
||||
inline TIndex size_to_dim(int k) const {
|
||||
inline int64_t size_to_dim(int k) const {
|
||||
return impl_.get()->size_to_dim(k);
|
||||
}
|
||||
|
||||
inline TIndex size_between_dim(int k, int l) const {
|
||||
inline int64_t size_between_dim(int k, int l) const {
|
||||
return impl_.get()->size_between_dim(k, l);
|
||||
}
|
||||
|
||||
|
|
@ -311,7 +311,7 @@ class CAFFE2_API Tensor final {
|
|||
return impl_.get()->dim32(i);
|
||||
}
|
||||
|
||||
inline TIndex dim(const int i) const {
|
||||
inline int64_t dim(const int i) const {
|
||||
return impl_.get()->dim(i);
|
||||
}
|
||||
|
||||
|
|
@ -337,7 +337,7 @@ TypeCall GetTypeCallFunction(TypeIdentifier id);
|
|||
void RegisterTypeCallFunction(TypeIdentifier id, TypeCall c);
|
||||
|
||||
// Shape call registry
|
||||
typedef vector<TIndex> (*TensorInfoCall)(
|
||||
typedef vector<int64_t> (*TensorInfoCall)(
|
||||
const void*,
|
||||
size_t* capacity,
|
||||
DeviceOption* device);
|
||||
|
|
@ -377,7 +377,7 @@ void TensorPrinter::Print(const Tensor& tensor) {
|
|||
std::stringstream values_stream;
|
||||
// One most likely doesn't want to print int64-number of items for visual
|
||||
// inspection, so we cast down to int here.
|
||||
int total_count = static_cast<int>(std::min(tensor.size(), TIndex(limit_)));
|
||||
int total_count = static_cast<int>(std::min(tensor.size(), int64_t(limit_)));
|
||||
const T* tensor_data = tensor.template data<T>();
|
||||
for (int i = 0; i < total_count - 1; ++i) {
|
||||
values_stream << tensor_data[i] << ",";
|
||||
|
|
|
|||
|
|
@ -26,17 +26,17 @@ namespace caffe2 {
|
|||
class DeviceOption;
|
||||
|
||||
/**
|
||||
* A utility function to convert vector<int> to vector<TIndex>.
|
||||
* A utility function to convert vector<int> to vector<int64_t>.
|
||||
*/
|
||||
inline std::vector<TIndex> ToVectorTIndex(const std::vector<int>& src) {
|
||||
return std::vector<TIndex>(src.begin(), src.end());
|
||||
inline std::vector<int64_t> ToVectorint64_t(const std::vector<int>& src) {
|
||||
return std::vector<int64_t>(src.begin(), src.end());
|
||||
}
|
||||
|
||||
/**
|
||||
* Return product of all dimensions starting from k
|
||||
*/
|
||||
inline TIndex size_from_dim_(int k, const std::vector<TIndex>& dims) {
|
||||
TIndex r = 1;
|
||||
inline int64_t size_from_dim_(int k, const std::vector<int64_t>& dims) {
|
||||
int64_t r = 1;
|
||||
for (size_t i = k; i < dims.size(); ++i) {
|
||||
r *= dims[i];
|
||||
}
|
||||
|
|
@ -44,9 +44,9 @@ inline TIndex size_from_dim_(int k, const std::vector<TIndex>& dims) {
|
|||
}
|
||||
|
||||
// Product of all dims up to k (not including dims[k])
|
||||
inline TIndex size_to_dim_(int k, const std::vector<TIndex>& dims) {
|
||||
inline int64_t size_to_dim_(int k, const std::vector<int64_t>& dims) {
|
||||
CAFFE_ENFORCE((unsigned)k <= dims.size());
|
||||
TIndex r = 1;
|
||||
int64_t r = 1;
|
||||
for (int i = 0; i < k; ++i) {
|
||||
r *= dims[i];
|
||||
}
|
||||
|
|
@ -54,9 +54,9 @@ inline TIndex size_to_dim_(int k, const std::vector<TIndex>& dims) {
|
|||
}
|
||||
|
||||
// Product of all dims between k and l (not including dims[k] and dims[l])
|
||||
inline TIndex size_between_dim_(int k, int l, const std::vector<TIndex>& dims) {
|
||||
inline int64_t size_between_dim_(int k, int l, const std::vector<int64_t>& dims) {
|
||||
CAFFE_ENFORCE((unsigned)l < dims.size());
|
||||
TIndex r = 1;
|
||||
int64_t r = 1;
|
||||
if (k < l) {
|
||||
for (int i = k + 1; i < l; ++i) {
|
||||
r *= dims[i];
|
||||
|
|
@ -191,7 +191,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
* @brief Extend the outer-most dimension of this tensor
|
||||
* to dimension of `num`.
|
||||
*/
|
||||
void ExtendTo(TIndex num, float growthPct, at::BaseContext* context) {
|
||||
void ExtendTo(int64_t num, float growthPct, at::BaseContext* context) {
|
||||
CAFFE_ENFORCE_GE_WITH_CALLER(dims_.size(), 1);
|
||||
CAFFE_ENFORCE_GE_WITH_CALLER(growthPct, 0);
|
||||
CAFFE_ENFORCE(context != nullptr, "Context must be provided.");
|
||||
|
|
@ -207,7 +207,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
* growthPct. This ensures that Extend runs on an amortized O(1) time
|
||||
* complexity.
|
||||
*/
|
||||
void Extend(TIndex num, float growthPct, at::BaseContext* context) {
|
||||
void Extend(int64_t num, float growthPct, at::BaseContext* context) {
|
||||
CAFFE_ENFORCE_GE_WITH_CALLER(dims_.size(), 1);
|
||||
CAFFE_ENFORCE_GE_WITH_CALLER(
|
||||
num, 0, "`num` must be non-negative for Extend");
|
||||
|
|
@ -223,8 +223,8 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
auto newNumel = std::accumulate(
|
||||
newDims.begin(),
|
||||
newDims.end(),
|
||||
static_cast<TIndex>(1),
|
||||
std::multiplies<TIndex>());
|
||||
static_cast<int64_t>(1),
|
||||
std::multiplies<int64_t>());
|
||||
if (newNumel * storage_.itemsize() <= storage_.capacity()) {
|
||||
dims_ = newDims;
|
||||
numel_ = newNumel;
|
||||
|
|
@ -253,7 +253,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
* This method guarantees that no re-allocations are carried out, which means
|
||||
* that the extra capacity after the end of the shurnk tensor is maintained.
|
||||
*/
|
||||
void ShrinkTo(TIndex outer_dim) {
|
||||
void ShrinkTo(int64_t outer_dim) {
|
||||
CAFFE_ENFORCE_WITH_CALLER(
|
||||
is_contiguous_,
|
||||
"Right now ShrinkTo is only supported on contiguous Tensor.");
|
||||
|
|
@ -268,8 +268,8 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
numel_ = std::accumulate(
|
||||
dims_.begin(),
|
||||
dims_.end(),
|
||||
static_cast<TIndex>(1),
|
||||
std::multiplies<TIndex>());
|
||||
static_cast<int64_t>(1),
|
||||
std::multiplies<int64_t>());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -292,8 +292,8 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
auto newNumel = std::accumulate(
|
||||
newCapacity.begin(),
|
||||
newCapacity.end(),
|
||||
static_cast<TIndex>(1),
|
||||
std::multiplies<TIndex>());
|
||||
static_cast<int64_t>(1),
|
||||
std::multiplies<int64_t>());
|
||||
if (newNumel * storage_.itemsize() <= storage_.capacity()) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -365,11 +365,11 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
* Resizes the tensor without touching underlying storage.
|
||||
* This requires the total size of the tensor to remains constant.
|
||||
*/
|
||||
inline void Reshape(const std::vector<TIndex>& dims) {
|
||||
inline void Reshape(const std::vector<int64_t>& dims) {
|
||||
CAFFE_ENFORCE_WITH_CALLER(
|
||||
is_contiguous_,
|
||||
"Right now Reshape is only supported for contiguous Tensor.");
|
||||
TIndex new_size = 1;
|
||||
int64_t new_size = 1;
|
||||
for (auto d : dims) {
|
||||
CAFFE_ENFORCE_GE_WITH_CALLER(d, 0);
|
||||
new_size *= d;
|
||||
|
|
@ -387,7 +387,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
}
|
||||
|
||||
inline void Reshape(const std::vector<int>& dims) {
|
||||
Reshape(ToVectorTIndex(dims));
|
||||
Reshape(ToVectorint64_t(dims));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -674,7 +674,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
/**
|
||||
* Returns the size (i.e. the number of items) of the tensor.
|
||||
*/
|
||||
inline TIndex size() const {
|
||||
inline int64_t size() const {
|
||||
return numel_;
|
||||
}
|
||||
/**
|
||||
|
|
@ -701,19 +701,19 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
/**
|
||||
* Returns the dimensions of the tensor as a vector.
|
||||
*/
|
||||
inline const std::vector<TIndex>& dims() const {
|
||||
inline const std::vector<int64_t>& dims() const {
|
||||
return dims_;
|
||||
}
|
||||
|
||||
inline TIndex size_from_dim(int k) const {
|
||||
inline int64_t size_from_dim(int k) const {
|
||||
return size_from_dim_(k, dims_);
|
||||
}
|
||||
|
||||
inline TIndex size_to_dim(int k) const {
|
||||
inline int64_t size_to_dim(int k) const {
|
||||
return size_to_dim_(k, dims_);
|
||||
}
|
||||
|
||||
inline TIndex size_between_dim(int k, int l) const {
|
||||
inline int64_t size_between_dim(int k, int l) const {
|
||||
return size_between_dim_(k, l, dims_);
|
||||
}
|
||||
|
||||
|
|
@ -772,7 +772,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
/**
|
||||
* Returns the i-th dimension of the tensor in int.
|
||||
*
|
||||
* This function returns an int value instead of TIndex, which depending on
|
||||
* This function returns an int value instead of int64_t, which depending on
|
||||
* the typedef could be int64. If you want int64 dim values, make sure you
|
||||
* call dim() instead.
|
||||
*/
|
||||
|
|
@ -790,7 +790,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
* must be between 0 (inclusive) and the number of dimensions, otherwise
|
||||
* this function will produce a fatal message.
|
||||
*/
|
||||
inline TIndex dim(const int i) const {
|
||||
inline int64_t dim(const int i) const {
|
||||
#ifndef NDEBUG
|
||||
CAFFE_ENFORCE_LT_WITH_CALLER(i, dims_.size(), "Exceeding ndim limit");
|
||||
CAFFE_ENFORCE_GE_WITH_CALLER(i, 0, "Cannot have negative dimension index");
|
||||
|
|
@ -818,9 +818,9 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
|
||||
protected:
|
||||
// TODO: change to DimVector
|
||||
std::vector<TIndex> dims_; // sizes_
|
||||
std::vector<int64_t> dims_; // sizes_
|
||||
at::DimVector strides_;
|
||||
TIndex numel_ = -1; // numel_
|
||||
int64_t numel_ = -1; // numel_
|
||||
bool is_contiguous_ = true;
|
||||
// we decide to keep reserved_ and it will
|
||||
// live in Tensor after the split
|
||||
|
|
@ -838,7 +838,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
bool SetDims(const std::vector<T>& src) {
|
||||
auto old_numel = numel_;
|
||||
dims_.resize(src.size());
|
||||
TIndex new_numel = 1;
|
||||
int64_t new_numel = 1;
|
||||
for (size_t i = 0; i < src.size(); ++i) {
|
||||
new_numel *= src[i];
|
||||
dims_[i] = src[i];
|
||||
|
|
@ -859,7 +859,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
// TODO(jiayq): maybe rewrite the following functions with initializer list.
|
||||
// NVCC does not play well with initializer lists last time, but worth
|
||||
// another shot.
|
||||
bool SetDims(const TIndex d0) {
|
||||
bool SetDims(const int64_t d0) {
|
||||
auto old_numel = numel_;
|
||||
dims_.resize(1);
|
||||
dims_[0] = d0;
|
||||
|
|
@ -868,7 +868,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
return numel_ != old_numel;
|
||||
}
|
||||
|
||||
bool SetDims(const TIndex d0, const TIndex d1) {
|
||||
bool SetDims(const int64_t d0, const int64_t d1) {
|
||||
auto old_numel = numel_;
|
||||
dims_.resize(2);
|
||||
dims_[0] = d0;
|
||||
|
|
@ -878,7 +878,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
return numel_ != old_numel;
|
||||
}
|
||||
|
||||
bool SetDims(const TIndex d0, const TIndex d1, const TIndex d2) {
|
||||
bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2) {
|
||||
auto old_numel = numel_;
|
||||
dims_.resize(3);
|
||||
dims_[0] = d0;
|
||||
|
|
@ -890,7 +890,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||
}
|
||||
|
||||
bool
|
||||
SetDims(const TIndex d0, const TIndex d1, const TIndex d2, const TIndex d3) {
|
||||
SetDims(const int64_t d0, const int64_t d1, const int64_t d2, const int64_t d3) {
|
||||
auto old_numel = numel_;
|
||||
dims_.resize(4);
|
||||
dims_[0] = d0;
|
||||
|
|
|
|||
|
|
@ -232,7 +232,7 @@ class MaxPoolRTCOp final : public ConvPoolOpBase<CUDAContext> {
|
|||
|
||||
private:
|
||||
MaxPoolRTCFunction func_;
|
||||
vector<TIndex> input_dims_;
|
||||
vector<int64_t> input_dims_;
|
||||
};
|
||||
|
||||
class MaxPoolGradientRTCOp final : public ConvPoolOpBase<CUDAContext> {
|
||||
|
|
@ -285,7 +285,7 @@ class MaxPoolGradientRTCOp final : public ConvPoolOpBase<CUDAContext> {
|
|||
|
||||
private:
|
||||
MaxPoolGradientRTCFunction func_;
|
||||
vector<TIndex> input_dims_;
|
||||
vector<int64_t> input_dims_;
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
|
|
|||
|
|
@ -29,8 +29,8 @@ namespace caffe2 {
|
|||
using Shape = std::array<int, N>;
|
||||
|
||||
template<int N>
|
||||
const std::vector<TIndex>& shape(Shape<N> vs) {
|
||||
static thread_local std::vector<TIndex> cache;
|
||||
const std::vector<int64_t>& shape(Shape<N> vs) {
|
||||
static thread_local std::vector<int64_t> cache;
|
||||
cache.resize(vs.size());
|
||||
for (auto i = 0; i < vs.size(); ++i) {
|
||||
cache[i] = vs[i];
|
||||
|
|
@ -38,11 +38,11 @@ namespace caffe2 {
|
|||
return cache;
|
||||
}
|
||||
|
||||
inline const std::vector<TIndex>& shape(int i) {
|
||||
inline const std::vector<int64_t>& shape(int i) {
|
||||
return shape<1>(Shape<1>({i}));
|
||||
}
|
||||
|
||||
inline const std::vector<TIndex>& shape(int i, int j) {
|
||||
inline const std::vector<int64_t>& shape(int i, int j) {
|
||||
return shape<2>(Shape<2>({i, j}));
|
||||
}
|
||||
|
||||
|
|
@ -177,7 +177,7 @@ namespace caffe2 {
|
|||
Y->template mutable_data<T>(), &context_);
|
||||
if (OutputSize() == 2){
|
||||
auto* Comp_rate = Output(1);
|
||||
Comp_rate->Resize(vector<TIndex>());
|
||||
Comp_rate->Resize(vector<int64_t>());
|
||||
T* comp_data = Comp_rate->template mutable_data<T>();
|
||||
math::Sum<T, Context>(
|
||||
Mask.size(), Mask.template data<T>(), comp_data, &context_);
|
||||
|
|
@ -262,7 +262,7 @@ namespace caffe2 {
|
|||
0, dW->template mutable_data<T>(),
|
||||
&context_);
|
||||
|
||||
comp_r_buf_.Resize(vector<TIndex>());
|
||||
comp_r_buf_.Resize(vector<int64_t>());
|
||||
T* comp_data = comp_r_buf_.template mutable_data<T>();
|
||||
math::Sum<T, Context>(
|
||||
Mask.size(), Mask.template data<T>(), comp_data, &context_);
|
||||
|
|
|
|||
|
|
@ -32,8 +32,8 @@ template<int N>
|
|||
using Shape = std::array<int, N>;
|
||||
|
||||
template<int N>
|
||||
const std::vector<TIndex>& shape(Shape<N> vs) {
|
||||
static thread_local std::vector<TIndex> cache;
|
||||
const std::vector<int64_t>& shape(Shape<N> vs) {
|
||||
static thread_local std::vector<int64_t> cache;
|
||||
cache.resize(vs.size());
|
||||
for (auto i = 0; i < vs.size(); ++i) {
|
||||
cache[i] = vs[i];
|
||||
|
|
@ -41,11 +41,11 @@ const std::vector<TIndex>& shape(Shape<N> vs) {
|
|||
return cache;
|
||||
}
|
||||
|
||||
inline const std::vector<TIndex>& shape(int i) {
|
||||
inline const std::vector<int64_t>& shape(int i) {
|
||||
return shape<1>(Shape<1>({i}));
|
||||
}
|
||||
|
||||
inline const std::vector<TIndex>& shape(int i, int j) {
|
||||
inline const std::vector<int64_t>& shape(int i, int j) {
|
||||
return shape<2>(Shape<2>({i, j}));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,9 +37,9 @@ class FunHashOp : public Operator<Context> {
|
|||
FunHashOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws),
|
||||
num_outputs_(
|
||||
OperatorBase::GetSingleArgument<TIndex>("num_outputs", -1)),
|
||||
OperatorBase::GetSingleArgument<int64_t>("num_outputs", -1)),
|
||||
num_segments_(
|
||||
OperatorBase::GetSingleArgument<TIndex>("num_segments", -1)),
|
||||
OperatorBase::GetSingleArgument<int64_t>("num_segments", -1)),
|
||||
seed_(OperatorBase::GetSingleArgument<uint64_t>("seed", 0)) {
|
||||
CAFFE_ENFORCE(
|
||||
OperatorBase::HasArgument("num_outputs"),
|
||||
|
|
@ -54,7 +54,7 @@ class FunHashOp : public Operator<Context> {
|
|||
const auto& seg = Input(2);
|
||||
const auto& weight = Input(3);
|
||||
|
||||
TIndex num_alpha = 1;
|
||||
int64_t num_alpha = 1;
|
||||
if (adaptive_) {
|
||||
const auto& alpha = Input(4);
|
||||
num_alpha = alpha.dim(0);
|
||||
|
|
@ -62,12 +62,12 @@ class FunHashOp : public Operator<Context> {
|
|||
|
||||
const auto* seg_data = seg.template data<int>();
|
||||
|
||||
TIndex num_weight = weight.dim(0);
|
||||
TIndex num_nz_ent = seg.dim(0);
|
||||
int64_t num_weight = weight.dim(0);
|
||||
int64_t num_nz_ent = seg.dim(0);
|
||||
|
||||
TIndex n_segments = num_segments_;
|
||||
int64_t n_segments = num_segments_;
|
||||
if (num_segments_ == -1) {
|
||||
for (TIndex i = 0; i < num_nz_ent; ++i) {
|
||||
for (int64_t i = 0; i < num_nz_ent; ++i) {
|
||||
if (seg_data[i] > n_segments) {
|
||||
n_segments = seg_data[i];
|
||||
}
|
||||
|
|
@ -85,16 +85,16 @@ class FunHashOp : public Operator<Context> {
|
|||
const auto* weight_data = weight.template data<T>();
|
||||
const auto* alpha_data = adaptive_ ? Input(4).template data<T>() : 0;
|
||||
const auto* val_data = val.template data<T>();
|
||||
const auto* key_data = key.template data<TIndex>();
|
||||
const auto* key_data = key.template data<int64_t>();
|
||||
|
||||
for (TIndex j = 0; j < num_nz_ent; ++j) {
|
||||
TIndex cur_seg = seg_data[j];
|
||||
TIndex cur_key = key_data[j];
|
||||
for (int64_t j = 0; j < num_nz_ent; ++j) {
|
||||
int64_t cur_seg = seg_data[j];
|
||||
int64_t cur_key = key_data[j];
|
||||
T cur_val = val_data[j];
|
||||
TIndex output_stride = cur_seg * num_outputs_;
|
||||
for (TIndex i = 0; i < num_outputs_; ++i) {
|
||||
int64_t output_stride = cur_seg * num_outputs_;
|
||||
for (int64_t i = 0; i < num_outputs_; ++i) {
|
||||
T sum = 0;
|
||||
for (TIndex k = 0; k < num_alpha; ++k) {
|
||||
for (int64_t k = 0; k < num_alpha; ++k) {
|
||||
uint64_t hash;
|
||||
// The hash function takes as input four integers:
|
||||
// 1. feature index
|
||||
|
|
@ -108,7 +108,7 @@ class FunHashOp : public Operator<Context> {
|
|||
|
||||
hash_data[3] = INDEX_MAGIC;
|
||||
hash = XXH64(hash_data.data(), hash_data.size(), seed_);
|
||||
TIndex index = hash % num_weight;
|
||||
int64_t index = hash % num_weight;
|
||||
|
||||
T cur_weight = weight_data[index];
|
||||
#ifdef USE_SIGN
|
||||
|
|
@ -133,8 +133,8 @@ class FunHashOp : public Operator<Context> {
|
|||
}
|
||||
|
||||
protected:
|
||||
TIndex num_outputs_;
|
||||
TIndex num_segments_;
|
||||
int64_t num_outputs_;
|
||||
int64_t num_segments_;
|
||||
uint64_t seed_;
|
||||
std::array<uint64_t, 4> hash_data;
|
||||
bool adaptive_;
|
||||
|
|
@ -147,7 +147,7 @@ class FunHashGradientOp : public Operator<Context> {
|
|||
FunHashGradientOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws),
|
||||
num_outputs_(
|
||||
OperatorBase::GetSingleArgument<TIndex>("num_outputs", -1)),
|
||||
OperatorBase::GetSingleArgument<int64_t>("num_outputs", -1)),
|
||||
seed_(OperatorBase::GetSingleArgument<uint64_t>("seed", 0)) {
|
||||
adaptive_ = (InputSize() == 6);
|
||||
}
|
||||
|
|
@ -159,7 +159,7 @@ class FunHashGradientOp : public Operator<Context> {
|
|||
const auto& seg = Input(3);
|
||||
const auto& weight = Input(4);
|
||||
|
||||
TIndex num_alpha = 1;
|
||||
int64_t num_alpha = 1;
|
||||
T* grad_alpha_data = 0;
|
||||
|
||||
if (adaptive_) {
|
||||
|
|
@ -173,8 +173,8 @@ class FunHashGradientOp : public Operator<Context> {
|
|||
|
||||
const auto* seg_data = seg.template data<int>();
|
||||
|
||||
TIndex num_weight = weight.dim(0);
|
||||
TIndex num_nz_ent = seg.dim(0);
|
||||
int64_t num_weight = weight.dim(0);
|
||||
int64_t num_nz_ent = seg.dim(0);
|
||||
|
||||
auto* grad_weight = Output(0);
|
||||
grad_weight->ResizeLike(weight);
|
||||
|
|
@ -184,18 +184,18 @@ class FunHashGradientOp : public Operator<Context> {
|
|||
const auto* weight_data = weight.template data<T>();
|
||||
const auto* alpha_data = adaptive_ ? Input(5).template data<T>() : 0;
|
||||
const auto* val_data = val.template data<T>();
|
||||
const auto* key_data = key.template data<TIndex>();
|
||||
const auto* key_data = key.template data<int64_t>();
|
||||
|
||||
memset(grad_weight_data, 0, sizeof(T) * num_weight);
|
||||
|
||||
for (TIndex j = 0; j < num_nz_ent; ++j) {
|
||||
TIndex cur_seg = seg_data[j];
|
||||
TIndex cur_key = key_data[j];
|
||||
for (int64_t j = 0; j < num_nz_ent; ++j) {
|
||||
int64_t cur_seg = seg_data[j];
|
||||
int64_t cur_key = key_data[j];
|
||||
T cur_val = val_data[j];
|
||||
TIndex grad_out_stride = cur_seg * num_outputs_;
|
||||
for (TIndex i = 0; i < num_outputs_; ++i) {
|
||||
int64_t grad_out_stride = cur_seg * num_outputs_;
|
||||
for (int64_t i = 0; i < num_outputs_; ++i) {
|
||||
T grad_out_scale = grad_out_data[grad_out_stride + i] * cur_val;
|
||||
for (TIndex k = 0; k < num_alpha; ++k) {
|
||||
for (int64_t k = 0; k < num_alpha; ++k) {
|
||||
uint64_t hash;
|
||||
hash_data[0] = cur_key;
|
||||
hash_data[1] = i;
|
||||
|
|
@ -203,7 +203,7 @@ class FunHashGradientOp : public Operator<Context> {
|
|||
|
||||
hash_data[3] = INDEX_MAGIC;
|
||||
hash = XXH64(hash_data.data(), hash_data.size(), seed_);
|
||||
TIndex index = hash % num_weight;
|
||||
int64_t index = hash % num_weight;
|
||||
|
||||
T cur_grad_out_scale = grad_out_scale;
|
||||
#ifdef USE_SIGN
|
||||
|
|
@ -227,7 +227,7 @@ class FunHashGradientOp : public Operator<Context> {
|
|||
}
|
||||
|
||||
protected:
|
||||
TIndex num_outputs_;
|
||||
int64_t num_outputs_;
|
||||
uint64_t seed_;
|
||||
std::array<uint64_t, 4> hash_data;
|
||||
bool adaptive_;
|
||||
|
|
|
|||
|
|
@ -36,9 +36,9 @@ class SparseFunHashOp : public Operator<Context> {
|
|||
SparseFunHashOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws),
|
||||
num_outputs_(
|
||||
OperatorBase::GetSingleArgument<TIndex>("num_outputs", -1)),
|
||||
OperatorBase::GetSingleArgument<int64_t>("num_outputs", -1)),
|
||||
num_segments_(
|
||||
OperatorBase::GetSingleArgument<TIndex>("num_segments", -1)),
|
||||
OperatorBase::GetSingleArgument<int64_t>("num_segments", -1)),
|
||||
seed_(OperatorBase::GetSingleArgument<uint64_t>("seed", 0)) {
|
||||
CAFFE_ENFORCE(
|
||||
OperatorBase::HasArgument("num_outputs"),
|
||||
|
|
@ -53,7 +53,7 @@ class SparseFunHashOp : public Operator<Context> {
|
|||
const auto& seg = Input(2);
|
||||
const auto& weight = Input(3);
|
||||
|
||||
TIndex num_alpha = 1;
|
||||
int64_t num_alpha = 1;
|
||||
if (adaptive_) {
|
||||
const auto& alpha = Input(4);
|
||||
num_alpha = alpha.dim(0);
|
||||
|
|
@ -61,12 +61,12 @@ class SparseFunHashOp : public Operator<Context> {
|
|||
|
||||
const auto* seg_data = seg.template data<int>();
|
||||
|
||||
TIndex num_weight = weight.dim(0);
|
||||
TIndex num_nz_ent = seg.dim(0);
|
||||
int64_t num_weight = weight.dim(0);
|
||||
int64_t num_nz_ent = seg.dim(0);
|
||||
|
||||
TIndex n_segments = num_segments_;
|
||||
int64_t n_segments = num_segments_;
|
||||
if (num_segments_ == -1) {
|
||||
for (TIndex i = 0; i < num_nz_ent; ++i) {
|
||||
for (int64_t i = 0; i < num_nz_ent; ++i) {
|
||||
if (seg_data[i] > n_segments) {
|
||||
n_segments = seg_data[i];
|
||||
}
|
||||
|
|
@ -84,16 +84,16 @@ class SparseFunHashOp : public Operator<Context> {
|
|||
const auto* weight_data = weight.template data<T>();
|
||||
const auto* alpha_data = adaptive_ ? Input(4).template data<T>() : 0;
|
||||
const auto* val_data = val.template data<T>();
|
||||
const auto* key_data = key.template data<TIndex>();
|
||||
const auto* key_data = key.template data<int64_t>();
|
||||
|
||||
for (TIndex j = 0; j < num_nz_ent; ++j) {
|
||||
TIndex cur_seg = seg_data[j];
|
||||
TIndex cur_key = key_data[j];
|
||||
for (int64_t j = 0; j < num_nz_ent; ++j) {
|
||||
int64_t cur_seg = seg_data[j];
|
||||
int64_t cur_key = key_data[j];
|
||||
T cur_val = val_data[j];
|
||||
TIndex output_stride = cur_seg * num_outputs_;
|
||||
for (TIndex i = 0; i < num_outputs_; ++i) {
|
||||
int64_t output_stride = cur_seg * num_outputs_;
|
||||
for (int64_t i = 0; i < num_outputs_; ++i) {
|
||||
T sum = 0;
|
||||
for (TIndex k = 0; k < num_alpha; ++k) {
|
||||
for (int64_t k = 0; k < num_alpha; ++k) {
|
||||
// The hash function takes as input three integers:
|
||||
// 1. feature index
|
||||
// 2. output index
|
||||
|
|
@ -108,13 +108,13 @@ class SparseFunHashOp : public Operator<Context> {
|
|||
|
||||
#ifdef USE_SIGN
|
||||
// Use the least significant bit for sign, the rest for weights.
|
||||
TIndex index = (hash >> 1) % num_weight;
|
||||
int64_t index = (hash >> 1) % num_weight;
|
||||
T cur_weight = weight_data[index];
|
||||
if (hash & 1) {
|
||||
cur_weight = -cur_weight;
|
||||
}
|
||||
#else
|
||||
TIndex index = hash % num_weight;
|
||||
int64_t index = hash % num_weight;
|
||||
T cur_weight = weight_data[index];
|
||||
#endif
|
||||
|
||||
|
|
@ -132,8 +132,8 @@ class SparseFunHashOp : public Operator<Context> {
|
|||
}
|
||||
|
||||
protected:
|
||||
TIndex num_outputs_;
|
||||
TIndex num_segments_;
|
||||
int64_t num_outputs_;
|
||||
int64_t num_segments_;
|
||||
uint64_t seed_;
|
||||
std::array<uint64_t, 4> hash_data;
|
||||
bool adaptive_;
|
||||
|
|
@ -146,7 +146,7 @@ class SparseFunHashGradientOp : public Operator<Context> {
|
|||
SparseFunHashGradientOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws),
|
||||
num_outputs_(
|
||||
OperatorBase::GetSingleArgument<TIndex>("num_outputs", -1)),
|
||||
OperatorBase::GetSingleArgument<int64_t>("num_outputs", -1)),
|
||||
seed_(OperatorBase::GetSingleArgument<uint64_t>("seed", 0)) {
|
||||
adaptive_ = (InputSize() == 6);
|
||||
}
|
||||
|
|
@ -158,7 +158,7 @@ class SparseFunHashGradientOp : public Operator<Context> {
|
|||
const auto& seg = Input(3);
|
||||
const auto& weight = Input(4);
|
||||
|
||||
TIndex num_alpha = 1;
|
||||
int64_t num_alpha = 1;
|
||||
T* grad_alpha_data = 0;
|
||||
|
||||
if (adaptive_) {
|
||||
|
|
@ -172,10 +172,10 @@ class SparseFunHashGradientOp : public Operator<Context> {
|
|||
|
||||
const auto* seg_data = seg.template data<int>();
|
||||
|
||||
TIndex num_weight = weight.dim(0);
|
||||
TIndex num_nz_ent = seg.dim(0);
|
||||
int64_t num_weight = weight.dim(0);
|
||||
int64_t num_nz_ent = seg.dim(0);
|
||||
|
||||
TIndex grad_weight_size = num_nz_ent * num_outputs_ * num_alpha;
|
||||
int64_t grad_weight_size = num_nz_ent * num_outputs_ * num_alpha;
|
||||
auto* grad_weight_val = Output(0);
|
||||
grad_weight_val->Resize(grad_weight_size);
|
||||
T* grad_weight_val_data = grad_weight_val->template mutable_data<T>();
|
||||
|
|
@ -183,23 +183,23 @@ class SparseFunHashGradientOp : public Operator<Context> {
|
|||
auto* grad_weight_ind = Output(1);
|
||||
grad_weight_ind->Resize(grad_weight_size);
|
||||
auto* grad_weight_ind_data =
|
||||
grad_weight_ind->template mutable_data<TIndex>();
|
||||
grad_weight_ind->template mutable_data<int64_t>();
|
||||
|
||||
const auto* grad_out_data = grad_out.template data<T>();
|
||||
const auto* weight_data = weight.template data<T>();
|
||||
const auto* alpha_data = adaptive_ ? Input(5).template data<T>() : 0;
|
||||
const auto* val_data = val.template data<T>();
|
||||
const auto* key_data = key.template data<TIndex>();
|
||||
const auto* key_data = key.template data<int64_t>();
|
||||
|
||||
TIndex w_ind = 0;
|
||||
for (TIndex j = 0; j < num_nz_ent; ++j) {
|
||||
TIndex cur_seg = seg_data[j];
|
||||
TIndex cur_key = key_data[j];
|
||||
int64_t w_ind = 0;
|
||||
for (int64_t j = 0; j < num_nz_ent; ++j) {
|
||||
int64_t cur_seg = seg_data[j];
|
||||
int64_t cur_key = key_data[j];
|
||||
T cur_val = val_data[j];
|
||||
TIndex grad_out_stride = cur_seg * num_outputs_;
|
||||
for (TIndex i = 0; i < num_outputs_; ++i) {
|
||||
int64_t grad_out_stride = cur_seg * num_outputs_;
|
||||
for (int64_t i = 0; i < num_outputs_; ++i) {
|
||||
T grad_out_scale = grad_out_data[grad_out_stride + i] * cur_val;
|
||||
for (TIndex k = 0; k < num_alpha; ++k) {
|
||||
for (int64_t k = 0; k < num_alpha; ++k) {
|
||||
hash_data[0] = cur_key;
|
||||
hash_data[1] = i;
|
||||
hash_data[2] = k;
|
||||
|
|
@ -209,12 +209,12 @@ class SparseFunHashGradientOp : public Operator<Context> {
|
|||
|
||||
T cur_grad_out_scale = grad_out_scale;
|
||||
#ifdef USE_SIGN
|
||||
TIndex index = (hash >> 1) % num_weight;
|
||||
int64_t index = (hash >> 1) % num_weight;
|
||||
if (hash & 1) {
|
||||
cur_grad_out_scale = -cur_grad_out_scale;
|
||||
}
|
||||
#else
|
||||
TIndex index = hash % num_weight;
|
||||
int64_t index = hash % num_weight;
|
||||
#endif
|
||||
|
||||
if (adaptive_) {
|
||||
|
|
@ -232,7 +232,7 @@ class SparseFunHashGradientOp : public Operator<Context> {
|
|||
}
|
||||
|
||||
protected:
|
||||
TIndex num_outputs_;
|
||||
int64_t num_outputs_;
|
||||
uint64_t seed_;
|
||||
std::array<uint64_t, 4> hash_data;
|
||||
bool adaptive_;
|
||||
|
|
|
|||
|
|
@ -36,10 +36,10 @@ class SparseMatrixReshapeOp : public Operator<Context> {
|
|||
OperatorBase::HasArgument("new_shape"),
|
||||
"Argument `new_shape` is missing.");
|
||||
|
||||
vector<TIndex> old_shape =
|
||||
OperatorBase::GetRepeatedArgument<TIndex>("old_shape");
|
||||
vector<TIndex> new_shape =
|
||||
OperatorBase::GetRepeatedArgument<TIndex>("new_shape");
|
||||
vector<int64_t> old_shape =
|
||||
OperatorBase::GetRepeatedArgument<int64_t>("old_shape");
|
||||
vector<int64_t> new_shape =
|
||||
OperatorBase::GetRepeatedArgument<int64_t>("new_shape");
|
||||
|
||||
CAFFE_ENFORCE(
|
||||
old_shape.size() == 2,
|
||||
|
|
@ -63,7 +63,7 @@ class SparseMatrixReshapeOp : public Operator<Context> {
|
|||
old_shape[0] > 0,
|
||||
"The first dimension in `old_shape` must be positive.");
|
||||
|
||||
TIndex matrix_size = old_shape[0] * old_shape[1];
|
||||
int64_t matrix_size = old_shape[0] * old_shape[1];
|
||||
|
||||
if (new_shape[0] == -1) {
|
||||
CAFFE_ENFORCE(
|
||||
|
|
@ -106,14 +106,14 @@ class SparseMatrixReshapeOp : public Operator<Context> {
|
|||
new_col->Resize(nnz);
|
||||
new_row->Resize(nnz);
|
||||
|
||||
const auto* old_col_data = old_col.template data<TIndex>();
|
||||
const auto* old_col_data = old_col.template data<int64_t>();
|
||||
const auto* old_row_data = old_row.template data<int>();
|
||||
|
||||
auto* new_col_data = new_col->template mutable_data<TIndex>();
|
||||
auto* new_col_data = new_col->template mutable_data<int64_t>();
|
||||
auto* new_row_data = new_row->template mutable_data<int>();
|
||||
|
||||
for (int i = 0; i < nnz; ++i) {
|
||||
TIndex offset = old_row_data[i] * old_stride_ + old_col_data[i];
|
||||
int64_t offset = old_row_data[i] * old_stride_ + old_col_data[i];
|
||||
new_row_data[i] = offset / new_stride_;
|
||||
new_col_data[i] = offset % new_stride_;
|
||||
}
|
||||
|
|
@ -122,8 +122,8 @@ class SparseMatrixReshapeOp : public Operator<Context> {
|
|||
}
|
||||
|
||||
private:
|
||||
TIndex old_stride_;
|
||||
TIndex new_stride_;
|
||||
int64_t old_stride_;
|
||||
int64_t new_stride_;
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
|
|
|||
|
|
@ -29,9 +29,9 @@ class TTContractionOp final : public Operator<Context> {
|
|||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
TTContractionOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws),
|
||||
K_(OperatorBase::GetSingleArgument<TIndex>("K", 0)),
|
||||
M_(OperatorBase::GetSingleArgument<TIndex>("M", 0)),
|
||||
N_(OperatorBase::GetSingleArgument<TIndex>("N", 0)) {
|
||||
K_(OperatorBase::GetSingleArgument<int64_t>("K", 0)),
|
||||
M_(OperatorBase::GetSingleArgument<int64_t>("M", 0)),
|
||||
N_(OperatorBase::GetSingleArgument<int64_t>("N", 0)) {
|
||||
CAFFE_ENFORCE(OperatorBase::HasArgument("K"), "Argument `K` is missing.");
|
||||
CAFFE_ENFORCE(OperatorBase::HasArgument("M"), "Argument `M` is missing.");
|
||||
CAFFE_ENFORCE(OperatorBase::HasArgument("N"), "Argument `N` is missing.");
|
||||
|
|
@ -44,8 +44,8 @@ class TTContractionOp final : public Operator<Context> {
|
|||
|
||||
CAFFE_ENFORCE(A.ndim() == 2, A.ndim());
|
||||
|
||||
TIndex A_size = A.size_from_dim(0);
|
||||
TIndex B_size = B.size_from_dim(0);
|
||||
int64_t A_size = A.size_from_dim(0);
|
||||
int64_t B_size = B.size_from_dim(0);
|
||||
|
||||
CAFFE_ENFORCE(
|
||||
K_ * M_ == A_size,
|
||||
|
|
@ -55,19 +55,19 @@ class TTContractionOp final : public Operator<Context> {
|
|||
B_size % (K_ * N_) == 0,
|
||||
"Argument `K` and `N` do not agree with the size of B.");
|
||||
|
||||
TIndex D_ = B_size / (K_ * N_);
|
||||
int64_t D_ = B_size / (K_ * N_);
|
||||
|
||||
TIndex C_size = D_ * M_ * N_;
|
||||
C->Resize(vector<TIndex>{C_size});
|
||||
int64_t C_size = D_ * M_ * N_;
|
||||
C->Resize(vector<int64_t>{C_size});
|
||||
|
||||
TIndex B_stride = K_ * N_;
|
||||
TIndex C_stride = M_ * N_;
|
||||
int64_t B_stride = K_ * N_;
|
||||
int64_t C_stride = M_ * N_;
|
||||
|
||||
const T* A_data = A.template data<T>();
|
||||
const T* B_data = B.template data<T>();
|
||||
T* C_data = C->template mutable_data<T>();
|
||||
|
||||
for (TIndex B_index = 0; B_index < B_size; B_index += B_stride) {
|
||||
for (int64_t B_index = 0; B_index < B_size; B_index += B_stride) {
|
||||
math::Gemm<T, Context, Engine>(
|
||||
CblasTrans,
|
||||
CblasNoTrans,
|
||||
|
|
@ -84,9 +84,9 @@ class TTContractionOp final : public Operator<Context> {
|
|||
}
|
||||
|
||||
protected:
|
||||
TIndex K_;
|
||||
TIndex M_;
|
||||
TIndex N_;
|
||||
int64_t K_;
|
||||
int64_t M_;
|
||||
int64_t N_;
|
||||
};
|
||||
|
||||
template <typename T, class Context, class Engine = DefaultEngine>
|
||||
|
|
@ -95,9 +95,9 @@ class TTContractionGradientOp final : public Operator<Context> {
|
|||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
TTContractionGradientOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws),
|
||||
K_(OperatorBase::GetSingleArgument<TIndex>("K", 0)),
|
||||
M_(OperatorBase::GetSingleArgument<TIndex>("M", 0)),
|
||||
N_(OperatorBase::GetSingleArgument<TIndex>("N", 0)) {}
|
||||
K_(OperatorBase::GetSingleArgument<int64_t>("K", 0)),
|
||||
M_(OperatorBase::GetSingleArgument<int64_t>("M", 0)),
|
||||
N_(OperatorBase::GetSingleArgument<int64_t>("N", 0)) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
const auto& G = Input(0);
|
||||
|
|
@ -106,16 +106,16 @@ class TTContractionGradientOp final : public Operator<Context> {
|
|||
auto* dA = Output(0);
|
||||
auto* dB = Output(1);
|
||||
|
||||
TIndex G_size = G.size_from_dim(0);
|
||||
TIndex D_ = G_size / (M_ * N_);
|
||||
int64_t G_size = G.size_from_dim(0);
|
||||
int64_t D_ = G_size / (M_ * N_);
|
||||
|
||||
TIndex dB_size = D_ * K_ * N_;
|
||||
int64_t dB_size = D_ * K_ * N_;
|
||||
|
||||
dA->Resize(A.dims());
|
||||
dB->Resize(B.dims());
|
||||
|
||||
TIndex B_stride = K_ * N_;
|
||||
TIndex G_stride = M_ * N_;
|
||||
int64_t B_stride = K_ * N_;
|
||||
int64_t G_stride = M_ * N_;
|
||||
|
||||
const T* G_data = G.template data<T>();
|
||||
const T* A_data = A.template data<T>();
|
||||
|
|
@ -125,7 +125,7 @@ class TTContractionGradientOp final : public Operator<Context> {
|
|||
T* dB_data = dB->template mutable_data<T>();
|
||||
|
||||
const T* G_ptr = G_data;
|
||||
for (TIndex B_index = 0; B_index < dB_size; B_index += B_stride) {
|
||||
for (int64_t B_index = 0; B_index < dB_size; B_index += B_stride) {
|
||||
math::Gemm<T, Context, Engine>(
|
||||
CblasNoTrans,
|
||||
CblasTrans,
|
||||
|
|
@ -139,7 +139,7 @@ class TTContractionGradientOp final : public Operator<Context> {
|
|||
}
|
||||
|
||||
G_ptr = G_data;
|
||||
for (TIndex B_index = 0; B_index < dB_size; B_index += B_stride) {
|
||||
for (int64_t B_index = 0; B_index < dB_size; B_index += B_stride) {
|
||||
math::Gemm<T, Context, Engine>(
|
||||
CblasNoTrans,
|
||||
CblasNoTrans,
|
||||
|
|
@ -156,9 +156,9 @@ class TTContractionGradientOp final : public Operator<Context> {
|
|||
}
|
||||
|
||||
protected:
|
||||
TIndex K_;
|
||||
TIndex M_;
|
||||
TIndex N_;
|
||||
int64_t K_;
|
||||
int64_t M_;
|
||||
int64_t N_;
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ class TTPadOp final : public Operator<Context> {
|
|||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
TTPadOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws),
|
||||
scale_(OperatorBase::GetSingleArgument<TIndex>("scale", 0)) {
|
||||
scale_(OperatorBase::GetSingleArgument<int64_t>("scale", 0)) {
|
||||
CAFFE_ENFORCE(
|
||||
OperatorBase::HasArgument("scale"), "Argument `scale` is missing.");
|
||||
}
|
||||
|
|
@ -46,16 +46,16 @@ class TTPadOp final : public Operator<Context> {
|
|||
|
||||
auto* X_orig_dim0 = Output(1);
|
||||
X_orig_dim0->Resize(1);
|
||||
*X_orig_dim0->template mutable_data<TIndex>() = X_dim0;
|
||||
*X_orig_dim0->template mutable_data<int64_t>() = X_dim0;
|
||||
|
||||
if (X_dim0 % scale_ != 0) {
|
||||
TIndex padded_dim0 = (X_dim0 / scale_ + 1) * scale_;
|
||||
int64_t padded_dim0 = (X_dim0 / scale_ + 1) * scale_;
|
||||
auto dim0_diff = padded_dim0 - X_dim0;
|
||||
// set growthPct to the upper bound percentage: (100 * scale_ / X_dim0)
|
||||
X_pad->Extend(dim0_diff, 100 * scale_ / X_dim0, &context_);
|
||||
|
||||
auto* X_pad_data = X_pad->template mutable_data<T>();
|
||||
TIndex X_size = X_dim0 * X_dim1;
|
||||
int64_t X_size = X_dim0 * X_dim1;
|
||||
memset(X_pad_data + X_size, 0, dim0_diff * X_dim1 * sizeof(T));
|
||||
}
|
||||
|
||||
|
|
@ -63,7 +63,7 @@ class TTPadOp final : public Operator<Context> {
|
|||
}
|
||||
|
||||
protected:
|
||||
TIndex scale_;
|
||||
int64_t scale_;
|
||||
};
|
||||
|
||||
template <typename T, class Context, class Engine = DefaultEngine>
|
||||
|
|
@ -78,7 +78,7 @@ class TTPadGradientOp final : public Operator<Context> {
|
|||
auto* output = Output(0);
|
||||
CAFFE_ENFORCE(&G == output);
|
||||
|
||||
auto old_dim0 = *Input(1).template data<TIndex>();
|
||||
auto old_dim0 = *Input(1).template data<int64_t>();
|
||||
auto new_dim0 = G.dim(0);
|
||||
auto dim1 = G.dim(1);
|
||||
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ class IDEEPConcatOp final : public IDEEPOperator {
|
|||
}
|
||||
|
||||
auto axis_vdata = ideep::concat::compute(inputs, axis_, add_axis_, *output);
|
||||
axis_info->Resize(vector<TIndex>(1, InputSize()));
|
||||
axis_info->Resize(vector<int64_t>(1, InputSize()));
|
||||
int* axis_data = axis_info->template mutable_data<int>();
|
||||
for (int i = 0; i < axis_vdata.size(); i++) {
|
||||
axis_data[i] = axis_vdata[i];
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ class IDEEPConvPoolOpBase : public ConvPoolOpBase<IDEEPContext> {
|
|||
ideep::tensor::dims output_dims;
|
||||
|
||||
auto input_dims = input.get_dims();
|
||||
vector<TIndex> input_Tdims (input_dims.begin(), input_dims.end());
|
||||
vector<int64_t> input_Tdims (input_dims.begin(), input_dims.end());
|
||||
InferOutputSize(
|
||||
input_Tdims,
|
||||
output_channel,
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ class IDEEPSqueezeOp final : public IDEEPOperator {
|
|||
(dims_.back() + 1),
|
||||
" dimensions.");
|
||||
const auto& ideep_dims = X.get_dims();
|
||||
vector<TIndex> dims(ideep_dims.begin(), ideep_dims.end());
|
||||
vector<int64_t> dims(ideep_dims.begin(), ideep_dims.end());
|
||||
const auto& new_dims = SqueezeOp<IDEEPContext>::ComputeDims(dims, dims_);
|
||||
itensor::dims new_dims_ideep(new_dims.begin(), new_dims.end());
|
||||
if (&X != Y) {
|
||||
|
|
|
|||
|
|
@ -372,14 +372,14 @@ ImageInputOp<Context>::ImageInputOp(
|
|||
randgen_per_thread_.emplace_back(meta_randgen());
|
||||
}
|
||||
prefetched_image_.Resize(
|
||||
TIndex(batch_size_),
|
||||
TIndex(crop_),
|
||||
TIndex(crop_),
|
||||
TIndex(color_ ? 3 : 1));
|
||||
int64_t(batch_size_),
|
||||
int64_t(crop_),
|
||||
int64_t(crop_),
|
||||
int64_t(color_ ? 3 : 1));
|
||||
if (label_type_ != SINGLE_LABEL && label_type_ != SINGLE_LABEL_WEIGHTED) {
|
||||
prefetched_label_.Resize(TIndex(batch_size_), TIndex(num_labels_));
|
||||
prefetched_label_.Resize(int64_t(batch_size_), int64_t(num_labels_));
|
||||
} else {
|
||||
prefetched_label_.Resize(vector<TIndex>(1, batch_size_));
|
||||
prefetched_label_.Resize(vector<int64_t>(1, batch_size_));
|
||||
}
|
||||
|
||||
for (int i = 0; i < additional_output_sizes.size(); ++i) {
|
||||
|
|
@ -387,7 +387,7 @@ ImageInputOp<Context>::ImageInputOp(
|
|||
Context::GetDeviceType());
|
||||
prefetched_additional_outputs_.emplace_back(CPU);
|
||||
prefetched_additional_outputs_[i].Resize(
|
||||
TIndex(batch_size_), TIndex(additional_output_sizes[i]));
|
||||
int64_t(batch_size_), int64_t(additional_output_sizes[i]));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,10 +23,10 @@ TEST(MKLDNNTest, SimpleConvolutionTest) {
|
|||
int pads[2] = {0, 0};
|
||||
|
||||
// Creating Input and output tensors
|
||||
Tensor X(vector<TIndex>{16, 8, 32, 32}, CPU);
|
||||
Tensor W(vector<TIndex>{64, 8, 3, 3}, CPU);
|
||||
Tensor b(vector<TIndex>{64}, CPU);
|
||||
Tensor Y(vector<TIndex>{16, 64, 30, 30}, CPU);
|
||||
Tensor X(vector<int64_t>{16, 8, 32, 32}, CPU);
|
||||
Tensor W(vector<int64_t>{64, 8, 3, 3}, CPU);
|
||||
Tensor b(vector<int64_t>{64}, CPU);
|
||||
Tensor Y(vector<int64_t>{16, 64, 30, 30}, CPU);
|
||||
|
||||
float* data = X.mutable_data<float>();
|
||||
for (int i = 0; i < X.size(); ++i) {
|
||||
|
|
@ -91,7 +91,7 @@ TEST(MKLDNNTest, MKLMemoryCopyTest) {
|
|||
// the buffer size being empty for both - former in dnnAllocateBuffer and
|
||||
// the latter in dnnConversionExecute (likely due to some difference in
|
||||
// layout?). Test both cases.
|
||||
vector<vector<TIndex>> dims_list{{10, 3, 20, 20}, {0}, {0, 10}};
|
||||
vector<vector<int64_t>> dims_list{{10, 3, 20, 20}, {0}, {0, 10}};
|
||||
for (const auto& dims : dims_list) {
|
||||
auto X_cpu_in = caffe2::make_unique<Tensor>(dims, CPU);
|
||||
CPUContext ctx;
|
||||
|
|
|
|||
|
|
@ -84,8 +84,8 @@ class MKLMemoryDeserializer : public BlobDeserializerBase {
|
|||
"MKLMemory only supports either float or double formats.");
|
||||
CAFFE_ENFORCE(
|
||||
!proto.has_segment(), "MKLMemory does not support segment right now.");
|
||||
vector<TIndex> dims;
|
||||
for (const TIndex d : proto.dims()) {
|
||||
vector<int64_t> dims;
|
||||
for (const int64_t d : proto.dims()) {
|
||||
dims.push_back(d);
|
||||
}
|
||||
// TODO: right now, every time we do a deserializer we create a new MKL
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ class MKLConcatOp final : public MKLOperator<T> {
|
|||
|
||||
private:
|
||||
int axis_;
|
||||
vector<TIndex> cached_output_dims_;
|
||||
vector<int64_t> cached_output_dims_;
|
||||
};
|
||||
|
||||
} // namespace mkl
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ class MKLConvOp final : public ConvPoolOpBase<MKLContext> {
|
|||
math::Set<T, CPUContext>(
|
||||
M, 0.0, cpu_zero_bias.template mutable_data<float>(), &ctx);
|
||||
|
||||
zero_bias_.reset(new MKLMemory<T>(std::vector<TIndex>{M}));
|
||||
zero_bias_.reset(new MKLMemory<T>(std::vector<int64_t>{M}));
|
||||
zero_bias_->CopyFrom(cpu_zero_bias);
|
||||
}
|
||||
const auto& bias = InputSize() == 2
|
||||
|
|
@ -130,11 +130,11 @@ class MKLConvOp final : public ConvPoolOpBase<MKLContext> {
|
|||
if (group_ > 1) {
|
||||
// Explicitly reformat the buffer.
|
||||
MKLMemory<float> group_filter(
|
||||
std::vector<TIndex>{TIndex(group_),
|
||||
TIndex(filter.dim32(0) / group_),
|
||||
TIndex(filter.dim32(1)),
|
||||
TIndex(filter.dim32(2)),
|
||||
TIndex(filter.dim32(3))},
|
||||
std::vector<int64_t>{int64_t(group_),
|
||||
int64_t(filter.dim32(0) / group_),
|
||||
int64_t(filter.dim32(1)),
|
||||
int64_t(filter.dim32(2)),
|
||||
int64_t(filter.dim32(3))},
|
||||
nullptr,
|
||||
dnnResourceFilter,
|
||||
/*share_memory_if_possible=*/true);
|
||||
|
|
@ -168,8 +168,8 @@ class MKLConvOp final : public ConvPoolOpBase<MKLContext> {
|
|||
// Input: X, W, b
|
||||
// Output: Y
|
||||
std::unique_ptr<MKLMemory<T>> zero_bias_;
|
||||
vector<TIndex> cached_input_dims_;
|
||||
vector<TIndex> cached_filter_dims_;
|
||||
vector<int64_t> cached_input_dims_;
|
||||
vector<int64_t> cached_filter_dims_;
|
||||
PrimitiveWrapper<T> primitive_;
|
||||
LayoutWrapper<T> input_layout_;
|
||||
LayoutWrapper<T> filter_layout_;
|
||||
|
|
|
|||
|
|
@ -106,8 +106,8 @@ class ConvMKLDNNOp final : public ConvPoolOpBase<CPUContext> {
|
|||
private:
|
||||
// Input: X, W, b
|
||||
// Output: Y
|
||||
vector<TIndex> cached_input_dims_;
|
||||
vector<TIndex> cached_filter_dims_;
|
||||
vector<int64_t> cached_input_dims_;
|
||||
vector<int64_t> cached_filter_dims_;
|
||||
PrimitiveWrapper<T> primitive_;
|
||||
unique_ptr<MKLMemory<T>> X_wrapper_ = nullptr;
|
||||
unique_ptr<MKLMemory<T>> filter_wrapper_ = nullptr;
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ class MKLSumOp final : public MKLOperator<T> {
|
|||
|
||||
private:
|
||||
std::vector<float> coefficients_;
|
||||
vector<TIndex> cached_input_dims_;
|
||||
vector<int64_t> cached_input_dims_;
|
||||
vector<std::shared_ptr<void>> input_views_;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -90,8 +90,8 @@ class MKLFullyConnectedOp final : public MKLOperator<T> {
|
|||
// Input: X, W, b
|
||||
// Output: Y
|
||||
size_t axis_{1};
|
||||
vector<TIndex> cached_input_dims_;
|
||||
vector<TIndex> cached_filter_dims_;
|
||||
vector<int64_t> cached_input_dims_;
|
||||
vector<int64_t> cached_filter_dims_;
|
||||
PrimitiveWrapper<T> primitive_;
|
||||
LayoutWrapper<T> input_layout_;
|
||||
LayoutWrapper<T> filter_layout_;
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ class MKLLRNOp final : public LRNOpBase<T, MKLContext> {
|
|||
bool RunOnDeviceWithOrderNHWC() override;
|
||||
|
||||
private:
|
||||
vector<TIndex> cached_input_dims_;
|
||||
vector<int64_t> cached_input_dims_;
|
||||
LayoutWrapper<T> workspace_layout_;
|
||||
std::unique_ptr<MKLWorkspace<T>> workspace_buffer_;
|
||||
PrimitiveWrapper<T> primitive_;
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ class PackedFCOp final : public Operator<CPUContext> {
|
|||
}
|
||||
size_t axis_{1};
|
||||
uint32_t hash_{0};
|
||||
vector<TIndex> Y_shape_cache_;
|
||||
vector<int64_t> Y_shape_cache_;
|
||||
Tensor bias_multiplier_{CPU};
|
||||
std::unique_ptr<MKLPackedMatrix> local_packed_matrix_;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -41,8 +41,8 @@ class MKLPoolOp final : public ConvPoolOpBase<MKLContext> {
|
|||
// Input: X
|
||||
// Output: Y
|
||||
private:
|
||||
vector<TIndex> cached_input_dims_;
|
||||
// vector<TIndex> cached_avgpool_input_dims_;
|
||||
vector<int64_t> cached_input_dims_;
|
||||
// vector<int64_t> cached_avgpool_input_dims_;
|
||||
LayoutWrapper<T> workspace_layout_;
|
||||
std::unique_ptr<MKLWorkspace<T>> workspace_buffer_;
|
||||
PrimitiveWrapper<T> primitive_;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ class MKLReluOp : public MKLOperator<T> {
|
|||
}
|
||||
|
||||
private:
|
||||
vector<TIndex> cached_input_dims_;
|
||||
vector<int64_t> cached_input_dims_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ class MKLBNOp final : public Operator<MKLContext> {
|
|||
const StorageOrder order_;
|
||||
const int num_batches_;
|
||||
|
||||
vector<TIndex> cached_input_dims_;
|
||||
vector<int64_t> cached_input_dims_;
|
||||
LayoutWrapper<T> scale_bias_layout_;
|
||||
LayoutWrapper<T> saved_mean_layout_;
|
||||
LayoutWrapper<T> saved_var_layout_;
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ class MKLSqueezeOp final : public MKLOperator<T> {
|
|||
|
||||
private:
|
||||
vector<int> dims_;
|
||||
vector<TIndex> cached_input_dims_;
|
||||
vector<int64_t> cached_input_dims_;
|
||||
};
|
||||
|
||||
} // namespace mkl
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ CAFFE_KNOWN_TYPE(mkl::MKLMemory<float>);
|
|||
CAFFE_KNOWN_TYPE(mkl::MKLMemory<double>);
|
||||
|
||||
template <typename T>
|
||||
static vector<TIndex> GetMKLTensorInfo(
|
||||
static vector<int64_t> GetMKLTensorInfo(
|
||||
const void* c,
|
||||
size_t* capacity,
|
||||
DeviceOption* device) {
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@
|
|||
#include <vector>
|
||||
#include <mutex>
|
||||
|
||||
#include "caffe2/core/flags.h" // for TIndex
|
||||
#include "caffe2/core/tensor.h" // for TIndex
|
||||
#include "caffe2/core/flags.h" // for int64_t
|
||||
#include "caffe2/core/tensor.h" // for int64_t
|
||||
#include "caffe2/mkl/utils/mkl_dnn_cppwrapper.h"
|
||||
|
||||
// A global boolean variable that controls the behavior when we call View() on
|
||||
|
|
@ -270,7 +270,7 @@ class MKLMemory {
|
|||
"Reshape is not allowed for custom layouts. "
|
||||
"Convert to plain layout before invoking Reshape().");
|
||||
|
||||
TIndex new_size = 1;
|
||||
int64_t new_size = 1;
|
||||
for (auto i = 0; i < dims.size(); ++i) {
|
||||
CAFFE_ENFORCE_GE_WITH_CALLER(dims[i], 0);
|
||||
new_size *= dims[i];
|
||||
|
|
@ -279,7 +279,7 @@ class MKLMemory {
|
|||
new_size == size_,
|
||||
"New size and old size are not equal. Reshape is not possible.");
|
||||
|
||||
vector<TIndex> new_dims(dims.size());
|
||||
vector<int64_t> new_dims(dims.size());
|
||||
vector<size_t> size(dims.size());
|
||||
vector<size_t> strides(dims.size());
|
||||
for (int i = 0; i < dims.size(); ++i) {
|
||||
|
|
@ -456,7 +456,7 @@ class MKLMemory {
|
|||
return buffer_.get();
|
||||
}
|
||||
|
||||
inline const vector<TIndex>& dims() const {
|
||||
inline const vector<int64_t>& dims() const {
|
||||
return dims_;
|
||||
}
|
||||
|
||||
|
|
@ -470,7 +470,7 @@ class MKLMemory {
|
|||
/**
|
||||
* Returns the size (i.e., the number of items) in the buffer.
|
||||
*/
|
||||
inline TIndex size() const {
|
||||
inline int64_t size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
|
|
@ -479,7 +479,7 @@ class MKLMemory {
|
|||
* must be between 0 (inclusive) and the number of dimensions, otherwise
|
||||
* this function will produce a fatal message.
|
||||
*/
|
||||
inline TIndex dim(const int i) const {
|
||||
inline int64_t dim(const int i) const {
|
||||
return dims_.at(i);
|
||||
}
|
||||
|
||||
|
|
@ -545,9 +545,9 @@ class MKLMemory {
|
|||
mutable std::mutex buffer_lock_;
|
||||
// The dimensions in the same order as Caffe2 does. This is used to
|
||||
// interface with C2.
|
||||
vector<TIndex> dims_;
|
||||
vector<int64_t> dims_;
|
||||
// Number of items in the buffer.
|
||||
TIndex size_ = -1;
|
||||
int64_t size_ = -1;
|
||||
// The user dnn layout.
|
||||
LayoutWrapper<T> user_layout_;
|
||||
// The internal dnn layout.
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ class MKLOperator : public OperatorBase {
|
|||
// The primitive used in the operator.
|
||||
PrimitiveWrapper<T> primitive_;
|
||||
// Size cache for all the input sizes.
|
||||
vector<vector<TIndex>> input_size_cache_;
|
||||
vector<vector<int64_t>> input_size_cache_;
|
||||
// An internal MKLMemory buffer. This is usually handy when we have a
|
||||
// single output from the operator. If your operator has multiple outputs
|
||||
// then you should allocate your own buffer.
|
||||
|
|
|
|||
|
|
@ -249,7 +249,7 @@ public:
|
|||
|
||||
const int32_t ndim() const { return dims_.size(); }
|
||||
|
||||
vector<TIndex> dims() const { return dims_; }
|
||||
vector<int64_t> dims() const { return dims_; }
|
||||
|
||||
const int32_t dim32(const int index) const { return dims_.at(index); }
|
||||
|
||||
|
|
@ -283,7 +283,7 @@ private:
|
|||
bool SetDims(const vector<TI> &src) {
|
||||
auto old_size = size_;
|
||||
dims_.resize(src.size());
|
||||
TIndex new_size = 1;
|
||||
int64_t new_size = 1;
|
||||
for (unsigned int i = 0; i < src.size(); ++i) {
|
||||
new_size *= src[i];
|
||||
dims_[i] = src[i];
|
||||
|
|
@ -299,7 +299,7 @@ private:
|
|||
return size_ > old_size;
|
||||
}
|
||||
|
||||
bool SetDims(const TIndex d0) {
|
||||
bool SetDims(const int64_t d0) {
|
||||
auto old_size = size_;
|
||||
dims_.resize(1);
|
||||
dims_[0] = d0;
|
||||
|
|
@ -307,7 +307,7 @@ private:
|
|||
return size_ > old_size;
|
||||
}
|
||||
|
||||
bool SetDims(const TIndex d0, const TIndex d1) {
|
||||
bool SetDims(const int64_t d0, const int64_t d1) {
|
||||
auto old_size = size_;
|
||||
dims_.resize(2);
|
||||
dims_[0] = d0;
|
||||
|
|
@ -316,7 +316,7 @@ private:
|
|||
return size_ > old_size;
|
||||
}
|
||||
|
||||
bool SetDims(const TIndex d0, const TIndex d1, const TIndex d2) {
|
||||
bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2) {
|
||||
auto old_size = size_;
|
||||
dims_.resize(3);
|
||||
dims_[0] = d0;
|
||||
|
|
@ -326,8 +326,8 @@ private:
|
|||
return size_ > old_size;
|
||||
}
|
||||
|
||||
bool SetDims(const TIndex d0, const TIndex d1, const TIndex d2,
|
||||
const TIndex d3) {
|
||||
bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2,
|
||||
const int64_t d3) {
|
||||
auto old_size = size_;
|
||||
dims_.resize(4);
|
||||
dims_[0] = d0;
|
||||
|
|
@ -338,8 +338,8 @@ private:
|
|||
return size_ > old_size;
|
||||
}
|
||||
|
||||
vector<TIndex> dims_;
|
||||
TIndex size_ = -1;
|
||||
vector<int64_t> dims_;
|
||||
int64_t size_ = -1;
|
||||
arm_compute::TensorShape shape_;
|
||||
unique_ptr<arm_compute::GCTensor> tensor_;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ bool GLFullyConnectedOp<T>::RunOnDevice() {
|
|||
CAFFE_ENFORCE_EQ(1, B_->ndim());
|
||||
CAFFE_ENFORCE_EQ(N, B_->dim32(0));
|
||||
|
||||
vector<TIndex> output_dims = {M, N};
|
||||
vector<int64_t> output_dims = {M, N};
|
||||
GLTensor<T> *Y =
|
||||
OperatorBase::Outputs()[0]->template GetMutable<GLTensor<T>>();
|
||||
if (first_run_) {
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ bool GLAveragePoolOp<DataType>::RunOnDeviceWithOrderNCHW() {
|
|||
int height = X_->dim32(2);
|
||||
int width = X_->dim32(3);
|
||||
|
||||
vector<TIndex> output_dims = {N, channels, 1, 1};
|
||||
vector<int64_t> output_dims = {N, channels, 1, 1};
|
||||
if (!global_pooling_) {
|
||||
output_dims[2] = (height + pad_t() + pad_b() - kernel_h()) / stride_h() + 1;
|
||||
output_dims[3] = (width + pad_l() + pad_r() - kernel_w()) / stride_w() + 1;
|
||||
|
|
@ -116,7 +116,7 @@ template <> bool GLMaxPoolOp<DataType>::RunOnDeviceWithOrderNCHW() {
|
|||
int height = X_->dim32(2);
|
||||
int width = X_->dim32(3);
|
||||
|
||||
vector<TIndex> output_dims = {N, channels, 1, 1};
|
||||
vector<int64_t> output_dims = {N, channels, 1, 1};
|
||||
if (!global_pooling_) {
|
||||
output_dims[2] = (height + pad_t() + pad_b() - kernel_h()) / stride_h() + 1;
|
||||
output_dims[3] = (width + pad_l() + pad_r() - kernel_w()) / stride_w() + 1;
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ bool GLResizeNearestOp<T>::RunOnDevice() {
|
|||
|
||||
GLTensor<T> *Y =
|
||||
OperatorBase::Outputs()[0]->template GetMutable<GLTensor<T>>();
|
||||
vector<TIndex> output_dims = {N, C, H * height_scale_, W * width_scale_};
|
||||
vector<int64_t> output_dims = {N, C, H * height_scale_, W * width_scale_};
|
||||
|
||||
if (first_run_) {
|
||||
Y->Resize(output_dims);
|
||||
|
|
|
|||
|
|
@ -329,7 +329,7 @@ class CopyToMPSCNNOp final : public Operator<CPUContext> {
|
|||
for (auto i = 0; i < Inputs().size(); ++i) {
|
||||
const auto& X = Input(i);
|
||||
CAFFE_ENFORCE(X.ndim() > 0 && X.ndim() <= 4);
|
||||
std::vector<TIndex> XDims = {1, 1, 1, 1};
|
||||
std::vector<int64_t> XDims = {1, 1, 1, 1};
|
||||
XDims.assign(X.dims().begin(), X.dims().end());
|
||||
|
||||
caffe2::Timer t;
|
||||
|
|
@ -2259,15 +2259,15 @@ class MPSCNNGenerateProposalsCPPOp final : public Operator<CPUContext> {
|
|||
|
||||
// bbox_deltas: (num_images, A * 4, H, W)
|
||||
CAFFE_ENFORCE_EQ(
|
||||
bbox_deltas.dims(), (vector<TIndex>{num_images, 4 * A, height, width}));
|
||||
bbox_deltas.dims(), (vector<int64_t>{num_images, 4 * A, height, width}));
|
||||
|
||||
// im_info_tensor: (num_images, 3), format [height, width, scale; ...]
|
||||
CAFFE_ENFORCE_EQ(im_info_tensor.dims(), (vector<TIndex>{num_images, 3}));
|
||||
CAFFE_ENFORCE_EQ(im_info_tensor.dims(), (vector<int64_t>{num_images, 3}));
|
||||
CAFFE_ENFORCE(
|
||||
im_info_tensor.template IsType<float>(), im_info_tensor.meta().name());
|
||||
|
||||
// anchors: (A, 4)
|
||||
CAFFE_ENFORCE_EQ(anchors.dims(), (vector<TIndex>{A, 4}));
|
||||
CAFFE_ENFORCE_EQ(anchors.dims(), (vector<int64_t>{A, 4}));
|
||||
CAFFE_ENFORCE(anchors.template IsType<float>(), anchors.meta().name());
|
||||
// Broadcast the anchors to all pixels
|
||||
auto all_anchors_vec =
|
||||
|
|
|
|||
|
|
@ -640,7 +640,7 @@ void testMPSCNN() {
|
|||
CAFFE_ENFORCE_EQ(t1.ndim(), 2);
|
||||
CAFFE_ENFORCE(t2.dim32(2) == 1 && t2.dim32(3) == 1);
|
||||
const_cast<TensorCPU&>(t2).Reshape(
|
||||
std::vector<TIndex>{TIndex(batchSize), TIndex(COut)});
|
||||
std::vector<int64_t>{int64_t(batchSize), int64_t(COut)});
|
||||
// Note dims do not match, as Metal leaves a 1x1 spatial
|
||||
// dimension.
|
||||
CAFFE_ENFORCE_EQ(t1.dims(), t2.dims());
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ namespace caffe2 {
|
|||
|
||||
namespace {
|
||||
|
||||
void AddNoiseInput(const vector<TIndex>& shape, const string& name, Workspace* ws) {
|
||||
void AddNoiseInput(const vector<int64_t>& shape, const string& name, Workspace* ws) {
|
||||
DeviceOption option;
|
||||
CPUContext context(option);
|
||||
Blob* blob = ws->CreateBlob(name);
|
||||
|
|
@ -58,7 +58,7 @@ void compareMaxPooling(int N,
|
|||
def1.add_arg()->CopyFrom(MakeArgument("pad_b", padB));
|
||||
def1.add_arg()->CopyFrom(MakeArgument("pad_r", padR));
|
||||
|
||||
AddNoiseInput(vector<TIndex>{N, C, H, W}, "X", &ws);
|
||||
AddNoiseInput(vector<int64_t>{N, C, H, W}, "X", &ws);
|
||||
|
||||
unique_ptr<OperatorBase> op1(CreateOperator(def1, &ws));
|
||||
EXPECT_NE(nullptr, op1.get());
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ namespace caffe2 {
|
|||
|
||||
namespace {
|
||||
|
||||
void AddNoiseInput(const vector<TIndex>& shape, const string& name, Workspace* ws) {
|
||||
void AddNoiseInput(const vector<int64_t>& shape, const string& name, Workspace* ws) {
|
||||
DeviceOption option;
|
||||
CPUContext context(option);
|
||||
Blob* blob = ws->CreateBlob(name);
|
||||
|
|
@ -44,7 +44,7 @@ void compareResizeNeareast(int N,
|
|||
def1.add_arg()->CopyFrom(MakeArgument("width_scale", wscale));
|
||||
def1.add_arg()->CopyFrom(MakeArgument("height_scale", hscale));
|
||||
|
||||
AddNoiseInput(vector<TIndex>{N, C, H, W}, "X", &ws);
|
||||
AddNoiseInput(vector<int64_t>{N, C, H, W}, "X", &ws);
|
||||
|
||||
unique_ptr<OperatorBase> op1(CreateOperator(def1, &ws));
|
||||
EXPECT_NE(nullptr, op1.get());
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
#include <vector>
|
||||
|
||||
void AddNoiseInput(const std::vector<caffe2::TIndex>& shape,
|
||||
void AddNoiseInput(const std::vector<int64_t>& shape,
|
||||
const std::string& name,
|
||||
caffe2::Workspace* ws) {
|
||||
caffe2::CPUContext context;
|
||||
|
|
@ -60,13 +60,13 @@ double BenchOp(const std::string& typ,
|
|||
def1.add_arg()->CopyFrom(caffe2::MakeArgument("pad_r", 0));
|
||||
def1.add_arg()->CopyFrom(caffe2::MakeArgument("convolution_transform_strategy", std::string("PRECOMPUTE")));
|
||||
|
||||
AddNoiseInput(std::vector<caffe2::TIndex>{1, inputC, inH, inW}, "X", ws);
|
||||
AddNoiseInput(std::vector<int64_t>{1, inputC, inH, inW}, "X", ws);
|
||||
if (transposed) {
|
||||
AddNoiseInput(std::vector<caffe2::TIndex>{inputC, outputC, kH, kW}, "W", ws);
|
||||
AddNoiseInput(std::vector<int64_t>{inputC, outputC, kH, kW}, "W", ws);
|
||||
} else {
|
||||
AddNoiseInput(std::vector<caffe2::TIndex>{outputC, inputC, kH, kW}, "W", ws);
|
||||
AddNoiseInput(std::vector<int64_t>{outputC, inputC, kH, kW}, "W", ws);
|
||||
}
|
||||
AddNoiseInput(std::vector<caffe2::TIndex>{outputC}, "B", ws);
|
||||
AddNoiseInput(std::vector<int64_t>{outputC}, "B", ws);
|
||||
|
||||
std::unique_ptr<caffe2::OperatorBase> op1(CreateOperator(def1, ws));
|
||||
|
||||
|
|
@ -131,19 +131,19 @@ static double BenchGLConvolution(int input_channels,
|
|||
}
|
||||
|
||||
AddNoiseInput(
|
||||
std::vector<caffe2::TIndex>{1, input_channels, input_height, input_width}, "X_cpu", ws);
|
||||
std::vector<int64_t>{1, input_channels, input_height, input_width}, "X_cpu", ws);
|
||||
if (transposed) {
|
||||
AddNoiseInput(
|
||||
std::vector<caffe2::TIndex>{input_channels, output_channels, kernel_height, kernel_width},
|
||||
std::vector<int64_t>{input_channels, output_channels, kernel_height, kernel_width},
|
||||
"W",
|
||||
ws);
|
||||
} else {
|
||||
AddNoiseInput(
|
||||
std::vector<caffe2::TIndex>{output_channels, input_channels, kernel_height, kernel_width},
|
||||
std::vector<int64_t>{output_channels, input_channels, kernel_height, kernel_width},
|
||||
"W",
|
||||
ws);
|
||||
}
|
||||
AddNoiseInput(std::vector<caffe2::TIndex>{output_channels}, "b", ws);
|
||||
AddNoiseInput(std::vector<int64_t>{output_channels}, "b", ws);
|
||||
|
||||
caffe2::NetDef netdef;
|
||||
{
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@
|
|||
|
||||
namespace caffe2 {
|
||||
|
||||
void AddConstInput(const vector<TIndex>& shape,
|
||||
void AddConstInput(const vector<int64_t>& shape,
|
||||
const float value,
|
||||
const string& name,
|
||||
Workspace* ws) {
|
||||
|
|
@ -50,7 +50,7 @@ void AddConstInput(const vector<TIndex>& shape,
|
|||
&context);
|
||||
}
|
||||
|
||||
void AddNoiseInput(const vector<TIndex>& shape,
|
||||
void AddNoiseInput(const vector<int64_t>& shape,
|
||||
const string& name,
|
||||
Workspace* ws) {
|
||||
DeviceOption option;
|
||||
|
|
@ -72,7 +72,7 @@ float snpe_run(int iters, Workspace& ws) {
|
|||
const int W = 227;
|
||||
const int C = 3;
|
||||
|
||||
POPULATE_DATA("X_snpe", (caffe2::vector<caffe2::TIndex>{H, W, C}), hwc);
|
||||
POPULATE_DATA("X_snpe", (caffe2::vector<int64_t>{H, W, C}), hwc);
|
||||
|
||||
OperatorDef def;
|
||||
def.set_name("snpe_test");
|
||||
|
|
@ -108,7 +108,7 @@ float caffe2_run(int iters, Workspace& ws) {
|
|||
ReadProtoFromBinaryFile("/data/local/tmp/squeeze_init_net.pb", &init_net);
|
||||
ReadProtoFromBinaryFile("/data/local/tmp/squeeze_predict_net.pb", &predict_net);
|
||||
ws.RunNetOnce(init_net);
|
||||
POPULATE_DATA("data", (caffe2::vector<caffe2::TIndex>{N, C, H, W}), chw);
|
||||
POPULATE_DATA("data", (caffe2::vector<int64_t>{N, C, H, W}), chw);
|
||||
predict_net.set_name("SqueezeNet");
|
||||
ws.CreateNet(predict_net);
|
||||
|
||||
|
|
|
|||
|
|
@ -538,7 +538,7 @@ void run2b1bConvIm2ColGEMM(QConvState* state,
|
|||
CAFFE_ENFORCE_EQ(Y->dim32(0), divRoundUp(X.dim32(0) * OH * OW, kGEMMTileSize) * kGEMMTileSize);
|
||||
CAFFE_ENFORCE_EQ(Y->dim32(1), OC);
|
||||
Y->ShrinkTo(X.dim32(0) * OH * OW);
|
||||
Y->Reshape(std::vector<TIndex>{{TIndex(X.dim(0)), TIndex(OH), TIndex(OW), TIndex(OC)}});
|
||||
Y->Reshape(std::vector<int64_t>{{int64_t(X.dim(0)), int64_t(OH), int64_t(OW), int64_t(OC)}});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ int randInt(int a, int b) {
|
|||
return std::uniform_int_distribution<int>(a, b)(gen);
|
||||
}
|
||||
|
||||
TensorCPU genTensor11(std::vector<TIndex> shape) {
|
||||
TensorCPU genTensor11(std::vector<int64_t> shape) {
|
||||
Tensor r(CPU);
|
||||
r.Resize(shape);
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ TensorCPU genTensor11(std::vector<TIndex> shape) {
|
|||
return r;
|
||||
}
|
||||
|
||||
TensorCPU genTensorUniform11(std::vector<TIndex> shape) {
|
||||
TensorCPU genTensorUniform11(std::vector<int64_t> shape) {
|
||||
Tensor r(CPU);
|
||||
r.Resize(shape);
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ TensorCPU genTensorUniform11(std::vector<TIndex> shape) {
|
|||
return r;
|
||||
}
|
||||
|
||||
TensorCPU genTensor0123(std::vector<TIndex> shape) {
|
||||
TensorCPU genTensor0123(std::vector<int64_t> shape) {
|
||||
Tensor r(CPU);
|
||||
r.Resize(shape);
|
||||
|
||||
|
|
@ -171,7 +171,7 @@ inline void qgemmNT(int M, int N, int K, const uint8_t* A, const uint8_t* B, flo
|
|||
}
|
||||
}
|
||||
|
||||
void gemmTest(TIndex M, TIndex N, TIndex K) {
|
||||
void gemmTest(int64_t M, int64_t N, int64_t K) {
|
||||
auto X = genTensor11({M, K});
|
||||
auto W = genTensor11({N, K});
|
||||
Tensor XQ(CPU), WQ(CPU), YQ(CPU), Y(CPU);
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ class MPIAllgatherOp final : public Operator<Context> {
|
|||
MPI_Comm comm = OperatorBase::Input<MPICommonWorldWrapper>(0).comm();
|
||||
auto& input = Input(1);
|
||||
auto* output = Output(0);
|
||||
vector<TIndex> output_dims = input.dims();
|
||||
vector<int64_t> output_dims = input.dims();
|
||||
output_dims[0] *= OperatorBase::Input<MPICommonWorldWrapper>(0).size();
|
||||
output->Resize(output_dims);
|
||||
MPI_CHECK(MPI_Allgather(
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ bool AccuracyOp<float, CPUContext>::RunOnDevice() {
|
|||
int D = X.dim32(1);
|
||||
CAFFE_ENFORCE_EQ(label.ndim(), 1);
|
||||
CAFFE_ENFORCE_EQ(label.dim32(0), N);
|
||||
Y->Resize(vector<TIndex>());
|
||||
Y->Resize(vector<int64_t>());
|
||||
const auto* Xdata = X.data<float>();
|
||||
const auto* labelData = label.data<int>();
|
||||
const int top_k = top_k_;
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ bool AccuracyOp<float, CUDAContext>::RunOnDevice() {
|
|||
int D = X.dim32(1);
|
||||
CAFFE_ENFORCE_EQ(label.ndim(), 1);
|
||||
CAFFE_ENFORCE_EQ(label.dim32(0), N);
|
||||
Y->Resize(vector<TIndex>());
|
||||
Y->Resize(vector<int64_t>());
|
||||
float* Ydata = Y->template mutable_data<float>();
|
||||
math::Set<float, CUDAContext>(1, 0, Ydata, &context_);
|
||||
AccuracyKernel<<<
|
||||
|
|
|
|||
|
|
@ -15,14 +15,14 @@ void ComputeArgImpl(
|
|||
const int n,
|
||||
const Compare& comp,
|
||||
const T* X,
|
||||
TIndex* Y,
|
||||
int64_t* Y,
|
||||
Context* context) {
|
||||
math::Set<TIndex, Context>(prev_size * next_size, TIndex(0), Y, context);
|
||||
math::Set<int64_t, Context>(prev_size * next_size, int64_t(0), Y, context);
|
||||
for (int i = 0; i < prev_size; ++i) {
|
||||
const T* cur_X = X + i * n * next_size + next_size;
|
||||
for (int k = 1; k < n; ++k) {
|
||||
for (int j = 0; j < next_size; ++j) {
|
||||
TIndex* cur_Y = Y + i * next_size + j;
|
||||
int64_t* cur_Y = Y + i * next_size + j;
|
||||
if (comp(*cur_X, X[i * n * next_size + *cur_Y * next_size + j])) {
|
||||
*cur_Y = k;
|
||||
}
|
||||
|
|
@ -41,7 +41,7 @@ bool ArgMaxReducer<CPUContext>::operator()(
|
|||
const int next_size,
|
||||
const int n,
|
||||
const T* X,
|
||||
TIndex* Y,
|
||||
int64_t* Y,
|
||||
CPUContext* context) const {
|
||||
ComputeArgImpl(prev_size, next_size, n, std::greater<T>(), X, Y, context);
|
||||
return true;
|
||||
|
|
@ -54,7 +54,7 @@ bool ArgMinReducer<CPUContext>::operator()(
|
|||
const int next_size,
|
||||
const int n,
|
||||
const T* X,
|
||||
TIndex* Y,
|
||||
int64_t* Y,
|
||||
CPUContext* context) const {
|
||||
ComputeArgImpl(prev_size, next_size, n, std::less<T>(), X, Y, context);
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ __global__ void ComputeArgCUDAKernel(
|
|||
const Reducer reducer,
|
||||
const T init,
|
||||
const T* X,
|
||||
TIndex* Y) {
|
||||
int64_t* Y) {
|
||||
__shared__ typename BlockReduce<int, T>::TempStorage temp_storage;
|
||||
const int d = stride.d();
|
||||
for (int idx = blockIdx.x; idx < outer_size; idx += gridDim.x) {
|
||||
|
|
@ -41,7 +41,7 @@ __global__ void ComputeArgCUDAKernel(
|
|||
}
|
||||
kv = BlockReduce<int, T>(temp_storage).Reduce(kv, reducer);
|
||||
if (threadIdx.x == 0) {
|
||||
Y[idx] = static_cast<TIndex>(kv.key);
|
||||
Y[idx] = static_cast<int64_t>(kv.key);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
|
@ -56,7 +56,7 @@ bool ArgMaxReducer<CUDAContext>::operator()(
|
|||
const int next_size,
|
||||
const int n,
|
||||
const T* X,
|
||||
TIndex* Y,
|
||||
int64_t* Y,
|
||||
CUDAContext* context) const {
|
||||
const int outer_size = prev_size * next_size;
|
||||
const FixedDivisor<int> stride(next_size);
|
||||
|
|
@ -82,7 +82,7 @@ bool ArgMinReducer<CUDAContext>::operator()(
|
|||
const int next_size,
|
||||
const int n,
|
||||
const T* X,
|
||||
TIndex* Y,
|
||||
int64_t* Y,
|
||||
CUDAContext* context) const {
|
||||
const int outer_size = prev_size * next_size;
|
||||
const FixedDivisor<int> stride(next_size);
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ class ArgOp final : public Operator<Context> {
|
|||
next_size,
|
||||
n,
|
||||
X.template data<T>(),
|
||||
Y->template mutable_data<TIndex>(),
|
||||
Y->template mutable_data<int64_t>(),
|
||||
&context_);
|
||||
}
|
||||
|
||||
|
|
@ -78,7 +78,7 @@ struct ArgMaxReducer {
|
|||
const int next_size,
|
||||
const int n,
|
||||
const T* X,
|
||||
TIndex* Y,
|
||||
int64_t* Y,
|
||||
Context* context) const;
|
||||
};
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ struct ArgMinReducer {
|
|||
const int next_size,
|
||||
const int n,
|
||||
const T* X,
|
||||
TIndex* Y,
|
||||
int64_t* Y,
|
||||
Context* context) const;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ class AssertOp final : public Operator<Context> {
|
|||
cmp_tensor_.CopyFrom(Input(0));
|
||||
auto* cmp_data = cmp_tensor_.template data<T>();
|
||||
|
||||
for (TIndex i = 0; i < cmp_tensor_.size(); ++i) {
|
||||
for (int64_t i = 0; i < cmp_tensor_.size(); ++i) {
|
||||
CAFFE_ENFORCE((bool)cmp_data[i], [&]() {
|
||||
std::stringstream ss;
|
||||
ss << "Assert failed for element " << i
|
||||
|
|
|
|||
|
|
@ -29,8 +29,8 @@ class AtomicFetchAddOp final : public Operator<CPUContext> {
|
|||
auto& b = Input(2);
|
||||
auto* c = Output(0);
|
||||
auto* d = Output(1);
|
||||
c->Resize(std::vector<TIndex>());
|
||||
d->Resize(std::vector<TIndex>());
|
||||
c->Resize(std::vector<int64_t>());
|
||||
d->Resize(std::vector<int64_t>());
|
||||
auto* aPtr = a.data<int32_t>();
|
||||
auto* bPtr = b.data<int32_t>();
|
||||
auto* cPtr = c->template mutable_data<int32_t>();
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ bool BatchBoxCoxOp<CPUContext>::DoRunWithType() {
|
|||
zeros_.clear();
|
||||
nonzeros_.reserve(D);
|
||||
zeros_.reserve(D);
|
||||
for (TIndex j = 0; j < D; j++) {
|
||||
for (int64_t j = 0; j < D; j++) {
|
||||
if (lambda1_ptr[j] == 0) {
|
||||
zeros_.push_back(j);
|
||||
} else {
|
||||
|
|
@ -121,7 +121,7 @@ bool BatchBoxCoxOp<CPUContext>::DoRunWithType() {
|
|||
// rows by replicating the input parameters K times. Then finish row-by-row.
|
||||
TypedCachedBuffers<T>& b = GetBuffers<T>();
|
||||
if (nonzeros_.size() == D) {
|
||||
TIndex i = 0;
|
||||
int64_t i = 0;
|
||||
if (K > 1) {
|
||||
TileArrayIntoVector(lambda1_ptr, D, K, &b.lambda1_);
|
||||
TileArrayIntoVector(lambda2_ptr, D, K, &b.lambda2_);
|
||||
|
|
@ -142,7 +142,7 @@ bool BatchBoxCoxOp<CPUContext>::DoRunWithType() {
|
|||
D, data_ptr, lambda1_ptr, lambda2_ptr, k_eps, output_ptr);
|
||||
}
|
||||
} else if (zeros_.size() == D) {
|
||||
TIndex i = 0;
|
||||
int64_t i = 0;
|
||||
if (K > 1) {
|
||||
TileArrayIntoVector(lambda2_ptr, D, K, &b.lambda2_z_);
|
||||
DCHECK_EQ(K * D, b.lambda2_z_.size());
|
||||
|
|
@ -169,7 +169,7 @@ bool BatchBoxCoxOp<CPUContext>::DoRunWithType() {
|
|||
PackV(nonzeros_.size(), lambda2_ptr, nonzeros_.data(), b.lambda2_.data());
|
||||
PackV(zeros_.size(), lambda2_ptr, zeros_.data(), b.lambda2_z_.data());
|
||||
|
||||
TIndex i = 0;
|
||||
int64_t i = 0;
|
||||
b.accumulator_.resize(std::max(nonzeros_.size(), zeros_.size()));
|
||||
if (K > 1) {
|
||||
// Truncate to original size, and re-tile with offsets this time.
|
||||
|
|
@ -219,15 +219,15 @@ bool BatchBoxCoxOp<CPUContext>::DoRunWithType() {
|
|||
template <>
|
||||
template <typename T>
|
||||
void BatchBoxCoxOp<CPUContext>::BoxCoxNaive(
|
||||
TIndex N,
|
||||
TIndex D,
|
||||
int64_t N,
|
||||
int64_t D,
|
||||
const T* data_ptr,
|
||||
const T* lambda1_ptr,
|
||||
const T* lambda2_ptr,
|
||||
T k_eps,
|
||||
T* output_ptr) {
|
||||
for (TIndex i = 0; i < N; i++) {
|
||||
for (TIndex j = 0; j < D; j++, data_ptr++, output_ptr++) {
|
||||
for (int64_t i = 0; i < N; i++) {
|
||||
for (int64_t j = 0; j < D; j++, data_ptr++, output_ptr++) {
|
||||
T lambda1_v = lambda1_ptr[j];
|
||||
T lambda2_v = lambda2_ptr[j];
|
||||
T tmp = std::max(*data_ptr + lambda2_v, k_eps);
|
||||
|
|
@ -245,18 +245,18 @@ void BatchBoxCoxOp<CPUContext>::BoxCoxNaive(
|
|||
template <>
|
||||
template <typename T>
|
||||
void BatchBoxCoxOp<CPUContext>::BoxCoxNonzeroLambda(
|
||||
TIndex D,
|
||||
int64_t D,
|
||||
const T* data_ptr,
|
||||
const T* lambda1,
|
||||
const T* lambda2,
|
||||
T k_eps,
|
||||
T* out) {
|
||||
caffe2::math::Add(D, data_ptr, lambda2, out, &context_);
|
||||
for (TIndex j = 0; j < D; j++) {
|
||||
for (int64_t j = 0; j < D; j++) {
|
||||
out[j] = std::max(out[j], k_eps);
|
||||
}
|
||||
Pow(D, out, lambda1, out);
|
||||
for (TIndex j = 0; j < D; j++) {
|
||||
for (int64_t j = 0; j < D; j++) {
|
||||
out[j] -= 1.0;
|
||||
}
|
||||
caffe2::math::Div(D, out, lambda1, out, &context_);
|
||||
|
|
@ -265,13 +265,13 @@ void BatchBoxCoxOp<CPUContext>::BoxCoxNonzeroLambda(
|
|||
template <>
|
||||
template <typename T>
|
||||
void BatchBoxCoxOp<CPUContext>::BoxCoxZeroLambda(
|
||||
TIndex D,
|
||||
int64_t D,
|
||||
const T* data_ptr,
|
||||
const T* lambda2,
|
||||
T k_eps,
|
||||
T* output_ptr) {
|
||||
caffe2::math::Add(D, data_ptr, lambda2, output_ptr, &context_);
|
||||
for (TIndex j = 0; j < D; j++) {
|
||||
for (int64_t j = 0; j < D; j++) {
|
||||
output_ptr[j] = std::max(output_ptr[j], k_eps);
|
||||
}
|
||||
caffe2::math::Log(D, output_ptr, output_ptr, &context_);
|
||||
|
|
|
|||
|
|
@ -27,8 +27,8 @@ class BatchBoxCoxOp final : public Operator<Context> {
|
|||
protected:
|
||||
template <typename T>
|
||||
void BoxCoxNaive(
|
||||
TIndex N,
|
||||
TIndex D,
|
||||
int64_t N,
|
||||
int64_t D,
|
||||
const T* data_ptr,
|
||||
const T* lambda1_ptr,
|
||||
const T* lambda2_ptr,
|
||||
|
|
@ -38,7 +38,7 @@ class BatchBoxCoxOp final : public Operator<Context> {
|
|||
#ifdef CAFFE2_USE_MKL
|
||||
template <typename T>
|
||||
void BoxCoxNonzeroLambda(
|
||||
TIndex D,
|
||||
int64_t D,
|
||||
const T* data_ptr,
|
||||
const T* lambda1,
|
||||
const T* lambda2,
|
||||
|
|
@ -47,7 +47,7 @@ class BatchBoxCoxOp final : public Operator<Context> {
|
|||
|
||||
template <typename T>
|
||||
void BoxCoxZeroLambda(
|
||||
TIndex D,
|
||||
int64_t D,
|
||||
const T* data_ptr,
|
||||
const T* lambda2,
|
||||
T k_eps,
|
||||
|
|
|
|||
|
|
@ -26,21 +26,21 @@ bool BatchBucketizeOp<CPUContext>::RunOnDevice() {
|
|||
auto feature_dim = feature.dim(1);
|
||||
auto output_dim = indices.size();
|
||||
|
||||
TIndex length_sum = 0;
|
||||
for (TIndex i = 0; i < lengths.size(); i++) {
|
||||
int64_t length_sum = 0;
|
||||
for (int64_t i = 0; i < lengths.size(); i++) {
|
||||
CAFFE_ENFORCE_GE(feature_dim, indices_data[i]);
|
||||
length_sum += lengths_data[i];
|
||||
}
|
||||
CAFFE_ENFORCE_EQ(length_sum, boundaries.size());
|
||||
|
||||
TIndex lower_bound = 0;
|
||||
int64_t lower_bound = 0;
|
||||
output->Resize(batch_size, output_dim);
|
||||
auto* output_data = output->template mutable_data<int32_t>();
|
||||
|
||||
for (TIndex i = 0; i < batch_size; i++) {
|
||||
for (int64_t i = 0; i < batch_size; i++) {
|
||||
lower_bound = 0;
|
||||
for (TIndex j = 0; j < output_dim; j++) {
|
||||
for (TIndex k = 0; k <= lengths_data[j]; k++) {
|
||||
for (int64_t j = 0; j < output_dim; j++) {
|
||||
for (int64_t k = 0; k <= lengths_data[j]; k++) {
|
||||
if (k == lengths_data[j] ||
|
||||
feature_data[i * feature_dim + indices_data[j]] <=
|
||||
boundaries_data[lower_bound + k]) {
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ bool BatchGatherOp<CUDAContext>::DoRunWithType() {
|
|||
auto& indices = Input(INDICES);
|
||||
auto* output = Output(0);
|
||||
|
||||
vector<TIndex> shape;
|
||||
vector<int64_t> shape;
|
||||
shape.push_back(data.dim(0));
|
||||
shape.insert(shape.end(), indices.dims().begin(), indices.dims().end());
|
||||
shape.insert(shape.end(), data.dims().begin() + 2, data.dims().end());
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ class BatchGatherOp final : public Operator<Context> {
|
|||
|
||||
CAFFE_ENFORCE_GE(data.ndim(), 2, "DATA should be at least 2-D");
|
||||
|
||||
vector<TIndex> shape;
|
||||
vector<int64_t> shape;
|
||||
shape.push_back(data.dim(0));
|
||||
shape.insert(shape.end(), indices.dims().begin(), indices.dims().end());
|
||||
shape.insert(shape.end(), data.dims().begin() + 2, data.dims().end());
|
||||
|
|
|
|||
|
|
@ -27,16 +27,16 @@ vector<TensorShape> TensorInferenceForBatchMatMul(
|
|||
b_dim1 = in[1].dims(ndim - 1);
|
||||
}
|
||||
|
||||
auto output_dims = vector<TIndex>{in[0].dims().begin(), in[0].dims().end()};
|
||||
auto output_dims = vector<int64_t>{in[0].dims().begin(), in[0].dims().end()};
|
||||
output_dims[ndim - 2] = a_dim0;
|
||||
output_dims[ndim - 1] = b_dim1;
|
||||
|
||||
return vector<TensorShape>{
|
||||
CreateTensorShape(vector<TIndex>{output_dims}, in[0].data_type())};
|
||||
CreateTensorShape(vector<int64_t>{output_dims}, in[0].data_type())};
|
||||
} else {
|
||||
auto ndims_A = in[0].dims_size();
|
||||
auto ndims_B = in[1].dims_size();
|
||||
std::vector<TIndex> dims_A(ndims_A), dims_B(ndims_B);
|
||||
std::vector<int64_t> dims_A(ndims_A), dims_B(ndims_B);
|
||||
for (int i = 0; i < ndims_A; ++i) {
|
||||
dims_A[i] = in[0].dims(i);
|
||||
}
|
||||
|
|
@ -66,7 +66,7 @@ vector<TensorShape> TensorInferenceForBatchMatMul(
|
|||
N = dims_B[ndims_B - 1];
|
||||
}
|
||||
|
||||
std::vector<TIndex> new_dims;
|
||||
std::vector<int64_t> new_dims;
|
||||
if (ndims_A >= ndims_B) {
|
||||
new_dims.assign(dims_A.begin(), dims_A.end() - 2);
|
||||
} else {
|
||||
|
|
@ -82,7 +82,7 @@ vector<TensorShape> TensorInferenceForBatchMatMul(
|
|||
new_dims.push_back(1);
|
||||
}
|
||||
return vector<TensorShape>{
|
||||
CreateTensorShape(vector<TIndex>{new_dims}, in[0].data_type())};
|
||||
CreateTensorShape(vector<int64_t>{new_dims}, in[0].data_type())};
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -175,7 +175,7 @@ class BatchMatMulOp final : public Operator<Context> {
|
|||
// Calculate output tensor shapes [B..., (M), (N)]
|
||||
// Batch dimensions will be broadcasted out to those of the longer tensor
|
||||
// A or B. Either M or N are optional if A or B, respectively are 1-D.
|
||||
std::vector<TIndex> new_dims;
|
||||
std::vector<int64_t> new_dims;
|
||||
if (ndims_A >= ndims_B) {
|
||||
new_dims.assign(dims_A.begin(), dims_A.end() - 2);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ class BatchMatMulOpGPUTest : public testing::Test {
|
|||
}
|
||||
|
||||
void AddConstInput(
|
||||
const std::vector<TIndex>& dims,
|
||||
const std::vector<int64_t>& dims,
|
||||
const float value,
|
||||
const string& name) {
|
||||
Blob* blob = ws_.CreateBlob(name);
|
||||
|
|
@ -39,7 +39,7 @@ class BatchMatMulOpGPUTest : public testing::Test {
|
|||
cuda_context_.get());
|
||||
}
|
||||
|
||||
void VerifyOutput(const std::vector<TIndex>& dims, const float value) const {
|
||||
void VerifyOutput(const std::vector<int64_t>& dims, const float value) const {
|
||||
const Blob* Y_blob = ws_.GetBlob("Y");
|
||||
ASSERT_NE(nullptr, Y_blob);
|
||||
const auto& Y = Y_blob->Get<Tensor>();
|
||||
|
|
@ -64,12 +64,12 @@ TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUNormalTest) {
|
|||
if (!HasCudaGPU()) {
|
||||
return;
|
||||
}
|
||||
AddConstInput(std::vector<TIndex>{3, 5, 10}, 1.0f, "A");
|
||||
AddConstInput(std::vector<TIndex>{3, 10, 6}, 1.0f, "B");
|
||||
AddConstInput(std::vector<int64_t>{3, 5, 10}, 1.0f, "A");
|
||||
AddConstInput(std::vector<int64_t>{3, 10, 6}, 1.0f, "B");
|
||||
std::unique_ptr<OperatorBase> op(CreateOperator(def_, &ws_));
|
||||
ASSERT_NE(nullptr, op);
|
||||
ASSERT_TRUE(op->Run());
|
||||
VerifyOutput(std::vector<TIndex>{3, 5, 6}, 10.0f);
|
||||
VerifyOutput(std::vector<int64_t>{3, 5, 6}, 10.0f);
|
||||
}
|
||||
|
||||
TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUBroadcastTest) {
|
||||
|
|
@ -79,12 +79,12 @@ TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUBroadcastTest) {
|
|||
auto* arg = def_.add_arg();
|
||||
arg->set_name("broadcast");
|
||||
arg->set_i(1);
|
||||
AddConstInput(std::vector<TIndex>{3, 5, 10}, 1.0f, "A");
|
||||
AddConstInput(std::vector<TIndex>{2, 3, 10, 6}, 1.0f, "B");
|
||||
AddConstInput(std::vector<int64_t>{3, 5, 10}, 1.0f, "A");
|
||||
AddConstInput(std::vector<int64_t>{2, 3, 10, 6}, 1.0f, "B");
|
||||
std::unique_ptr<OperatorBase> op(CreateOperator(def_, &ws_));
|
||||
ASSERT_NE(nullptr, op);
|
||||
ASSERT_TRUE(op->Run());
|
||||
VerifyOutput(std::vector<TIndex>{2, 3, 5, 6}, 10.0f);
|
||||
VerifyOutput(std::vector<int64_t>{2, 3, 5, 6}, 10.0f);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ class BatchMatMulOpTest : public testing::Test {
|
|||
}
|
||||
|
||||
void AddConstInput(
|
||||
const std::vector<TIndex>& dims,
|
||||
const std::vector<int64_t>& dims,
|
||||
const float value,
|
||||
const string& name) {
|
||||
Blob* blob = ws_.CreateBlob(name);
|
||||
|
|
@ -33,7 +33,7 @@ class BatchMatMulOpTest : public testing::Test {
|
|||
cpu_context_.get());
|
||||
}
|
||||
|
||||
void VerifyOutput(const std::vector<TIndex>& dims, const float value) const {
|
||||
void VerifyOutput(const std::vector<int64_t>& dims, const float value) const {
|
||||
const Blob* Y_blob = ws_.GetBlob("Y");
|
||||
ASSERT_NE(nullptr, Y_blob);
|
||||
const auto& Y = Y_blob->Get<TensorCPU>();
|
||||
|
|
@ -54,24 +54,24 @@ class BatchMatMulOpTest : public testing::Test {
|
|||
};
|
||||
|
||||
TEST_F(BatchMatMulOpTest, BatchMatMulOpNormalTest) {
|
||||
AddConstInput(std::vector<TIndex>{3, 5, 10}, 1.0f, "A");
|
||||
AddConstInput(std::vector<TIndex>{3, 10, 6}, 1.0f, "B");
|
||||
AddConstInput(std::vector<int64_t>{3, 5, 10}, 1.0f, "A");
|
||||
AddConstInput(std::vector<int64_t>{3, 10, 6}, 1.0f, "B");
|
||||
std::unique_ptr<OperatorBase> op(CreateOperator(def_, &ws_));
|
||||
ASSERT_NE(nullptr, op);
|
||||
ASSERT_TRUE(op->Run());
|
||||
VerifyOutput(std::vector<TIndex>{3, 5, 6}, 10.0f);
|
||||
VerifyOutput(std::vector<int64_t>{3, 5, 6}, 10.0f);
|
||||
}
|
||||
|
||||
TEST_F(BatchMatMulOpTest, BatchMatMulOpBroadcastTest) {
|
||||
auto* arg = def_.add_arg();
|
||||
arg->set_name("broadcast");
|
||||
arg->set_i(1);
|
||||
AddConstInput(std::vector<TIndex>{3, 5, 10}, 1.0f, "A");
|
||||
AddConstInput(std::vector<TIndex>{2, 3, 10, 6}, 1.0f, "B");
|
||||
AddConstInput(std::vector<int64_t>{3, 5, 10}, 1.0f, "A");
|
||||
AddConstInput(std::vector<int64_t>{2, 3, 10, 6}, 1.0f, "B");
|
||||
std::unique_ptr<OperatorBase> op(CreateOperator(def_, &ws_));
|
||||
ASSERT_NE(nullptr, op);
|
||||
ASSERT_TRUE(op->Run());
|
||||
VerifyOutput(std::vector<TIndex>{2, 3, 5, 6}, 10.0f);
|
||||
VerifyOutput(std::vector<int64_t>{2, 3, 5, 6}, 10.0f);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
|
|||
|
|
@ -14,15 +14,15 @@ bool BatchSparseToDenseOp<T, Context>::RunOnDevice() {
|
|||
CAFFE_ENFORCE_EQ(lengths.ndim(), 1);
|
||||
CAFFE_ENFORCE_EQ(indices.ndim(), 1);
|
||||
|
||||
const TIndex* lengths_data = lengths.template data<TIndex>();
|
||||
const TIndex* indices_data = indices.template data<TIndex>();
|
||||
const int64_t* lengths_data = lengths.template data<int64_t>();
|
||||
const int64_t* indices_data = indices.template data<int64_t>();
|
||||
const T* values_data = values.template data<T>();
|
||||
TIndex batch_size = lengths.size();
|
||||
TIndex lengths_sum = 0;
|
||||
math::Sum<TIndex, Context>(batch_size, lengths_data, &lengths_sum, &context_);
|
||||
int64_t batch_size = lengths.size();
|
||||
int64_t lengths_sum = 0;
|
||||
math::Sum<int64_t, Context>(batch_size, lengths_data, &lengths_sum, &context_);
|
||||
CAFFE_ENFORCE_EQ(lengths_sum, indices.size());
|
||||
|
||||
vector<TIndex> output_shape = {batch_size};
|
||||
vector<int64_t> output_shape = {batch_size};
|
||||
if (InputSize() == 4) {
|
||||
auto& shaper = Input(3);
|
||||
CAFFE_ENFORCE_EQ(shaper.ndim(), 2);
|
||||
|
|
@ -42,9 +42,9 @@ bool BatchSparseToDenseOp<T, Context>::RunOnDevice() {
|
|||
math::Set(
|
||||
output->size(), static_cast<T>(default_value_), output_data, &context_);
|
||||
|
||||
TIndex k = 0;
|
||||
for (TIndex i = 0; i < batch_size; ++i) {
|
||||
for (TIndex j = 0; j < lengths_data[i]; ++j) {
|
||||
int64_t k = 0;
|
||||
for (int64_t i = 0; i < batch_size; ++i) {
|
||||
for (int64_t j = 0; j < lengths_data[i]; ++j) {
|
||||
CAFFE_ENFORCE(
|
||||
indices_data[k] < dense_last_dim_,
|
||||
"An indice (",
|
||||
|
|
@ -69,24 +69,24 @@ bool BatchDenseToSparseOp<T, Context>::RunOnDevice() {
|
|||
CAFFE_ENFORCE_EQ(lengths.ndim(), 1);
|
||||
CAFFE_ENFORCE_EQ(indices.ndim(), 1);
|
||||
CAFFE_ENFORCE_EQ(dense.ndim(), 2);
|
||||
const TIndex* lengths_data = lengths.template data<TIndex>();
|
||||
const TIndex* indices_data = indices.template data<TIndex>();
|
||||
const int64_t* lengths_data = lengths.template data<int64_t>();
|
||||
const int64_t* indices_data = indices.template data<int64_t>();
|
||||
const T* dense_data = dense.template data<T>();
|
||||
|
||||
TIndex batch_size = lengths.size();
|
||||
TIndex lengths_sum = 0;
|
||||
math::Sum<TIndex, Context>(batch_size, lengths_data, &lengths_sum, &context_);
|
||||
int64_t batch_size = lengths.size();
|
||||
int64_t lengths_sum = 0;
|
||||
math::Sum<int64_t, Context>(batch_size, lengths_data, &lengths_sum, &context_);
|
||||
CAFFE_ENFORCE_EQ(lengths_sum, indices.size());
|
||||
|
||||
CAFFE_ENFORCE_EQ(batch_size, dense.dim(0));
|
||||
dense_last_dim_ = dense.dim(1);
|
||||
vector<TIndex> output_shape = indices.dims();
|
||||
vector<int64_t> output_shape = indices.dims();
|
||||
output->Resize(output_shape);
|
||||
T* output_data = output->template mutable_data<T>();
|
||||
|
||||
TIndex k = 0;
|
||||
for (TIndex i = 0; i < batch_size; ++i) {
|
||||
for (TIndex j = 0; j < lengths_data[i]; ++j) {
|
||||
int64_t k = 0;
|
||||
for (int64_t i = 0; i < batch_size; ++i) {
|
||||
for (int64_t j = 0; j < lengths_data[i]; ++j) {
|
||||
CAFFE_ENFORCE(
|
||||
indices_data[k] < dense.dim(1),
|
||||
"An indice (",
|
||||
|
|
|
|||
|
|
@ -15,12 +15,12 @@ class BatchSparseToDenseOp : public Operator<Context> {
|
|||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
BatchSparseToDenseOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws),
|
||||
OP_SINGLE_ARG(TIndex, "dense_last_dim", dense_last_dim_, -1),
|
||||
OP_SINGLE_ARG(int64_t, "dense_last_dim", dense_last_dim_, -1),
|
||||
OP_SINGLE_ARG(T, "default_value", default_value_, static_cast<T>(0)) {}
|
||||
bool RunOnDevice() override;
|
||||
|
||||
private:
|
||||
TIndex dense_last_dim_;
|
||||
int64_t dense_last_dim_;
|
||||
T default_value_;
|
||||
INPUT_TAGS(LENGTHS, INDICES, VALUES);
|
||||
};
|
||||
|
|
@ -34,7 +34,7 @@ class BatchDenseToSparseOp : public Operator<Context> {
|
|||
bool RunOnDevice() override;
|
||||
|
||||
private:
|
||||
TIndex dense_last_dim_;
|
||||
int64_t dense_last_dim_;
|
||||
INPUT_TAGS(LENGTHS, INDICES, DENSE);
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ bool BBoxTransformOp<float, CPUContext>::RunOnDevice() {
|
|||
}
|
||||
}
|
||||
|
||||
CAFFE_ENFORCE_EQ(iminfo_in.dims(), (vector<TIndex>{batch_size, 3}));
|
||||
CAFFE_ENFORCE_EQ(iminfo_in.dims(), (vector<int64_t>{batch_size, 3}));
|
||||
Eigen::Map<const ERArrXXf> iminfo(
|
||||
iminfo_in.data<float>(), iminfo_in.dim(0), iminfo_in.dim(1));
|
||||
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ bool BooleanMaskOp<CPUContext>::RunOnDevice() {
|
|||
++numOutputs;
|
||||
}
|
||||
}
|
||||
std::vector<TIndex> outShape;
|
||||
std::vector<int64_t> outShape;
|
||||
outShape.push_back(numOutputs);
|
||||
outShape.insert(outShape.end(), data.dims().begin() + 1, data.dims().end());
|
||||
dataOut->Resize(outShape);
|
||||
|
|
@ -81,11 +81,11 @@ bool BooleanMaskOp<CPUContext>::RunOnDevice() {
|
|||
const auto innerSize = data.size_from_dim(1);
|
||||
const auto innerSizeBytes = innerSize * data.meta().itemsize();
|
||||
|
||||
TIndex lastStart = -1;
|
||||
int64_t lastStart = -1;
|
||||
const auto* inPtr = (char*)data.raw_data();
|
||||
TIndex outStart = 0;
|
||||
int64_t outStart = 0;
|
||||
|
||||
for (TIndex i = 0;; ++i) {
|
||||
for (int64_t i = 0;; ++i) {
|
||||
// mask was true and either a) became false, or b) sequence finished
|
||||
if (lastStart != -1 && ((i >= outerSize) || !maskPtr[i])) {
|
||||
const auto* src = inPtr + lastStart * innerSizeBytes;
|
||||
|
|
|
|||
|
|
@ -7,15 +7,15 @@ namespace caffe2 {
|
|||
|
||||
namespace {
|
||||
__global__ void BooleanMaskCopyKernel(
|
||||
const TIndex numOfOutput,
|
||||
const TIndex numBytes,
|
||||
const TIndex* indices,
|
||||
const int64_t numOfOutput,
|
||||
const int64_t numBytes,
|
||||
const int64_t* indices,
|
||||
const uint8_t* src,
|
||||
uint8_t* dest) {
|
||||
for (TIndex i = blockIdx.x; i < numOfOutput; i += gridDim.x) {
|
||||
for (int64_t i = blockIdx.x; i < numOfOutput; i += gridDim.x) {
|
||||
const auto srcBase = indices[i] * numBytes;
|
||||
const auto destBase = i * numBytes;
|
||||
for (TIndex j = threadIdx.x; j < numBytes; j += blockDim.x) {
|
||||
for (int64_t j = threadIdx.x; j < numBytes; j += blockDim.x) {
|
||||
dest[destBase + j] = src[srcBase + j];
|
||||
}
|
||||
}
|
||||
|
|
@ -40,7 +40,7 @@ class BooleanMaskOp<CUDAContext> final : public Operator<CUDAContext> {
|
|||
const auto* maskData = mask.data<bool>();
|
||||
const auto outerSize = mask.dims()[0];
|
||||
indices_.Resize(outerSize);
|
||||
auto* indicesData = indices_.mutable_data<TIndex>();
|
||||
auto* indicesData = indices_.mutable_data<int64_t>();
|
||||
|
||||
size_t numBytes = 0;
|
||||
cub::CountingInputIterator<int> itr(0);
|
||||
|
|
@ -50,16 +50,16 @@ class BooleanMaskOp<CUDAContext> final : public Operator<CUDAContext> {
|
|||
itr,
|
||||
maskData,
|
||||
indicesData,
|
||||
static_cast<TIndex*>(nullptr),
|
||||
static_cast<int64_t*>(nullptr),
|
||||
outerSize,
|
||||
context_.cuda_stream());
|
||||
|
||||
auto numTIndex =
|
||||
static_cast<TIndex>((numBytes + sizeof(TIndex) - 1) / sizeof(TIndex));
|
||||
// allocate one more TIndex at the end of scratch for storing numOfOutput
|
||||
scratch_.Resize(numTIndex + 1);
|
||||
auto* scratchData = scratch_.mutable_data<TIndex>();
|
||||
auto* numOfOutputData = scratchData + numTIndex;
|
||||
auto numint64_t =
|
||||
static_cast<int64_t>((numBytes + sizeof(int64_t) - 1) / sizeof(int64_t));
|
||||
// allocate one more int64_t at the end of scratch for storing numOfOutput
|
||||
scratch_.Resize(numint64_t + 1);
|
||||
auto* scratchData = scratch_.mutable_data<int64_t>();
|
||||
auto* numOfOutputData = scratchData + numint64_t;
|
||||
|
||||
cub::DeviceSelect::Flagged(
|
||||
static_cast<void*>(scratchData),
|
||||
|
|
@ -72,11 +72,11 @@ class BooleanMaskOp<CUDAContext> final : public Operator<CUDAContext> {
|
|||
context_.cuda_stream());
|
||||
|
||||
// Copy numOfOutput from gpu to cpu
|
||||
TIndex numOfOutput;
|
||||
int64_t numOfOutput;
|
||||
context_.CopyToCPU(1, numOfOutputData, &numOfOutput);
|
||||
|
||||
indices_.Resize(numOfOutput);
|
||||
std::vector<TIndex> dims = src.dims();
|
||||
std::vector<int64_t> dims = src.dims();
|
||||
dims[0] = numOfOutput;
|
||||
dest->Resize(dims);
|
||||
auto* destData = (uint8_t*)dest->raw_mutable_data(src.meta());
|
||||
|
|
@ -84,12 +84,12 @@ class BooleanMaskOp<CUDAContext> final : public Operator<CUDAContext> {
|
|||
if (OutputSize() == 2) {
|
||||
auto* indicesOut = Output(1);
|
||||
indicesOut->Resize(numOfOutput);
|
||||
indicesOut->template mutable_data<TIndex>();
|
||||
indicesOut->template mutable_data<int64_t>();
|
||||
}
|
||||
|
||||
if (numOfOutput > 0) {
|
||||
BooleanMaskCopyKernel<<<
|
||||
min(numOfOutput, static_cast<TIndex>(CAFFE_MAXIMUM_NUM_BLOCKS)),
|
||||
min(numOfOutput, static_cast<int64_t>(CAFFE_MAXIMUM_NUM_BLOCKS)),
|
||||
CAFFE_CUDA_NUM_THREADS,
|
||||
0,
|
||||
context_.cuda_stream()>>>(
|
||||
|
|
|
|||
|
|
@ -18,10 +18,10 @@ static void AddScalarInput(
|
|||
Blob* blob = ws->CreateBlob(name);
|
||||
auto* tensor = blob->GetMutableTensor(CPU);
|
||||
if (!isEmpty) {
|
||||
tensor->Resize(vector<TIndex>{1});
|
||||
tensor->Resize(vector<int64_t>{1});
|
||||
*(tensor->template mutable_data<DataT>()) = value;
|
||||
} else {
|
||||
tensor->Resize(vector<TIndex>{0});
|
||||
tensor->Resize(vector<int64_t>{0});
|
||||
tensor->template mutable_data<DataT>();
|
||||
}
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ bool CastOp<CPUContext>::DoRunWithType() {
|
|||
const auto* data = input.template data<SrcType>();
|
||||
auto* out = output->template mutable_data<DstType>();
|
||||
auto N = input.size();
|
||||
for (TIndex i = 0; i < N; ++i) {
|
||||
for (int64_t i = 0; i < N; ++i) {
|
||||
out[i] = static_cast<DstType>(data[i]);
|
||||
}
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ class CastOp : public Operator<Context> {
|
|||
const auto* data = input.template data<SrcType>();
|
||||
auto* out = output->template mutable_data<DstType>();
|
||||
auto N = input.size();
|
||||
for (TIndex i = 0; i < N; ++i) {
|
||||
for (int64_t i = 0; i < N; ++i) {
|
||||
out[i] = static_cast<DstType>(data[i]);
|
||||
}
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ bool SplitOp<Context>::RunOnDevice() {
|
|||
input_channels,
|
||||
"Sum of split dimensions do not match: should be ",
|
||||
input_channels);
|
||||
vector<TIndex> output_dims(input.dims());
|
||||
vector<int64_t> output_dims(input.dims());
|
||||
int before = 1, after = 1;
|
||||
for (int i = 0; i < canonical_axis; ++i) {
|
||||
before *= input.dim32(i);
|
||||
|
|
@ -215,7 +215,7 @@ bool SplitByLengthsOp<Context>::RunOnDevice() {
|
|||
input_channels,
|
||||
"Sum of split dimensions do not match: should be ",
|
||||
input_channels);
|
||||
vector<TIndex> output_dims(input.dims());
|
||||
vector<int64_t> output_dims(input.dims());
|
||||
int before = input.size_to_dim(canonical_axis);
|
||||
int after = input.size_from_dim(canonical_axis + 1);
|
||||
size_t input_offset = 0;
|
||||
|
|
@ -245,7 +245,7 @@ template <class Context>
|
|||
bool ConcatOp<Context>::RunOnDevice() {
|
||||
auto* output = Output(0);
|
||||
Tensor* split = this->template Output<Tensor>(1, CPU);
|
||||
split->Resize(vector<TIndex>(1, InputSize()));
|
||||
split->Resize(vector<int64_t>(1, InputSize()));
|
||||
int* axis_data = split->template mutable_data<int>();
|
||||
auto& input_zero = Input(0);
|
||||
int adj_size = input_zero.ndim() + (add_axis_ ? 1 : 0);
|
||||
|
|
@ -263,7 +263,7 @@ bool ConcatOp<Context>::RunOnDevice() {
|
|||
}
|
||||
|
||||
int before = 1, after = 1;
|
||||
vector<TIndex> output_dims(input_zero.dims());
|
||||
vector<int64_t> output_dims(input_zero.dims());
|
||||
for (int i = 0; i < input_zero.ndim(); ++i) {
|
||||
if (i == canonical_axis && !add_axis_) {
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ bool ConditionalOp<CPUContext>::RunOnDevice() {
|
|||
// perform conditional op along first dimension
|
||||
const auto* ptrT = (char*)dataT.raw_data();
|
||||
const auto* ptrF = (char*)dataF.raw_data();
|
||||
for (TIndex i = 0; i < condition.size(); i++) {
|
||||
for (int64_t i = 0; i < condition.size(); i++) {
|
||||
auto* dst = outPtr + i * innerSizeBytes;
|
||||
if (condPtr[i]) {
|
||||
context_.CopyItemsSameDevice(
|
||||
|
|
|
|||
|
|
@ -16,8 +16,8 @@ class AlgorithmsCache {
|
|||
// combination of tensor dimensions & compute data type.
|
||||
//
|
||||
TAlgorithm getAlgorithm(
|
||||
const std::vector<TIndex>& tensorDimensions1,
|
||||
const std::vector<TIndex>& tensorDimensions2,
|
||||
const std::vector<int64_t>& tensorDimensions1,
|
||||
const std::vector<int64_t>& tensorDimensions2,
|
||||
int algorithmFlags, // Differentiate between algorithms with different
|
||||
// parameters in a generic way
|
||||
std::function<TAlgorithm()> generatingFunc);
|
||||
|
|
@ -28,14 +28,14 @@ class AlgorithmsCache {
|
|||
|
||||
template <typename TAlgorithm>
|
||||
TAlgorithm AlgorithmsCache<TAlgorithm>::getAlgorithm(
|
||||
const std::vector<TIndex>& tensorDimensions1,
|
||||
const std::vector<TIndex>& tensorDimensions2,
|
||||
const std::vector<int64_t>& tensorDimensions1,
|
||||
const std::vector<int64_t>& tensorDimensions2,
|
||||
int algorithmFlags,
|
||||
std::function<TAlgorithm()> generatingFunc) {
|
||||
int64_t seed = 0;
|
||||
// Hash all of the inputs, which we wiill then use to try and look up
|
||||
// a previously discovered algorithm, or fall back to generating a new one.
|
||||
std::hash<TIndex> hashFn;
|
||||
std::hash<int64_t> hashFn;
|
||||
for (const auto num : tensorDimensions1) {
|
||||
// Copied from boost::hash_combine.
|
||||
// Adding 1 to differentiate between first and second vector.
|
||||
|
|
|
|||
|
|
@ -12,11 +12,11 @@ namespace caffe2 {
|
|||
TEST(AlgorithmsCacheTest, CachesCorrectly) {
|
||||
AlgorithmsCache<int> cache;
|
||||
int result = cache.getAlgorithm(
|
||||
std::vector<TIndex>(1), std::vector<TIndex>(1), 0, []() { return 5; });
|
||||
std::vector<int64_t>(1), std::vector<int64_t>(1), 0, []() { return 5; });
|
||||
EXPECT_EQ(result, 5);
|
||||
|
||||
int res2 = cache.getAlgorithm(
|
||||
std::vector<TIndex>(1), std::vector<TIndex>(1), 0, []() { return 10; });
|
||||
std::vector<int64_t>(1), std::vector<int64_t>(1), 0, []() { return 10; });
|
||||
|
||||
EXPECT_EQ(res2, 5);
|
||||
}
|
||||
|
|
@ -24,11 +24,11 @@ TEST(AlgorithmsCacheTest, CachesCorrectly) {
|
|||
TEST(AlgorithmsCacheTest, KeysDifferIfOneVectorIsEmpty) {
|
||||
AlgorithmsCache<int> cache;
|
||||
int result = cache.getAlgorithm(
|
||||
std::vector<TIndex>(1, 10), std::vector<TIndex>(), 0, []() { return 5; });
|
||||
std::vector<int64_t>(1, 10), std::vector<int64_t>(), 0, []() { return 5; });
|
||||
EXPECT_EQ(result, 5);
|
||||
|
||||
int res2 = cache.getAlgorithm(
|
||||
std::vector<TIndex>(), std::vector<TIndex>(1, 10), 0, []() {
|
||||
std::vector<int64_t>(), std::vector<int64_t>(1, 10), 0, []() {
|
||||
return 10;
|
||||
});
|
||||
|
||||
|
|
@ -38,20 +38,20 @@ TEST(AlgorithmsCacheTest, KeysDifferIfOneVectorIsEmpty) {
|
|||
TEST(AlgorithmsCacheTest, KeysDifferIfFlagsAreDifferent) {
|
||||
AlgorithmsCache<int> cache;
|
||||
int result = cache.getAlgorithm(
|
||||
std::vector<TIndex>{2, 3, 4}, std::vector<TIndex>{5, 6}, 123, []() {
|
||||
std::vector<int64_t>{2, 3, 4}, std::vector<int64_t>{5, 6}, 123, []() {
|
||||
return 5;
|
||||
});
|
||||
EXPECT_EQ(result, 5);
|
||||
|
||||
int res2 = cache.getAlgorithm(
|
||||
std::vector<TIndex>{2, 3, 4}, std::vector<TIndex>{5, 6}, 456, []() {
|
||||
std::vector<int64_t>{2, 3, 4}, std::vector<int64_t>{5, 6}, 456, []() {
|
||||
return 10;
|
||||
});
|
||||
|
||||
EXPECT_EQ(res2, 10);
|
||||
|
||||
int res3 = cache.getAlgorithm(
|
||||
std::vector<TIndex>{2, 3, 4}, std::vector<TIndex>{5, 6}, 456, []() {
|
||||
std::vector<int64_t>{2, 3, 4}, std::vector<int64_t>{5, 6}, 456, []() {
|
||||
return 15;
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -411,8 +411,8 @@ class CudnnConvOpBase : public ConvPoolOpBase<CUDAContext> {
|
|||
}
|
||||
}
|
||||
|
||||
vector<TIndex> cudnn_input_dims_;
|
||||
vector<TIndex> cudnn_filter_dims_;
|
||||
vector<int64_t> cudnn_input_dims_;
|
||||
vector<int64_t> cudnn_filter_dims_;
|
||||
|
||||
CuDNNWrapper cudnn_wrapper_;
|
||||
cudnnTensorDescriptor_t bottom_desc_;
|
||||
|
|
|
|||
|
|
@ -42,10 +42,10 @@ bool EigenConvOp<T>::RunOnDeviceWithOrderNCHW() {
|
|||
CAFFE_ENFORCE(filter.dim32(2) == kernel_h());
|
||||
CAFFE_ENFORCE(filter.dim32(3) == kernel_w());
|
||||
ConvPoolOpBase<CPUContext>::SetOutputSize(X, Y, filter.dim32(0));
|
||||
Eigen::array<TIndex, 4> kernel_shuffles
|
||||
{ {TIndex(2), TIndex(3), TIndex(1), TIndex(0)} };
|
||||
Eigen::array<TIndex, 4> input_shuffles
|
||||
{ {TIndex(0), TIndex(2), TIndex(3), TIndex(1)} };
|
||||
Eigen::array<int64_t, 4> kernel_shuffles
|
||||
{ {int64_t(2), int64_t(3), int64_t(1), int64_t(0)} };
|
||||
Eigen::array<int64_t, 4> input_shuffles
|
||||
{ {int64_t(0), int64_t(2), int64_t(3), int64_t(1)} };
|
||||
|
||||
Eigen::Tensor<T, 4, Eigen::RowMajor> filter_tensor =
|
||||
Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
|
||||
|
|
@ -109,14 +109,14 @@ bool EigenConvOp<T>::RunOnDeviceWithOrderNCHW() {
|
|||
// It seems that the bias broadcast is still slower so let's do the
|
||||
// following for now.
|
||||
EigenArrayMap<T> Y_arr(
|
||||
Y_tensor.data(), static_cast<TIndex>(M), Y->size() / M);
|
||||
Y_tensor.data(), static_cast<int64_t>(M), Y->size() / M);
|
||||
ConstEigenVectorArrayMap<T> bias_arr(bias.template data<T>(), M);
|
||||
Y_arr = Y_arr.colwise() + bias_arr;
|
||||
}
|
||||
|
||||
// Do a last transpose.
|
||||
Eigen::array<TIndex, 4> output_shuffles
|
||||
{ {TIndex(0), TIndex(3), TIndex(1), TIndex(2) } };
|
||||
Eigen::array<int64_t, 4> output_shuffles
|
||||
{ {int64_t(0), int64_t(3), int64_t(1), int64_t(2) } };
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
|
||||
Y->template mutable_data<T>(), N, M, Y->dim32(2), Y->dim32(3)) =
|
||||
|
|
@ -204,7 +204,7 @@ bool EigenConvOp<T>::RunOnDeviceWithOrderNHWC() {
|
|||
// It seems that the bias broadcast is still slower so let's do the
|
||||
// following for now.
|
||||
EigenArrayMap<T> Y_arr(
|
||||
Y->template mutable_data<T>(), static_cast<TIndex>(M), Y->size() / M);
|
||||
Y->template mutable_data<T>(), static_cast<int64_t>(M), Y->size() / M);
|
||||
ConstEigenVectorArrayMap<T> bias_arr(bias.template data<T>(), M);
|
||||
Y_arr = Y_arr.colwise() + bias_arr;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -240,7 +240,7 @@ bool ConvOp<T, Context>::RunOnDeviceWithOrderNHWC() {
|
|||
}
|
||||
auto f = [&](Tensor* col_buffer) {
|
||||
col_buffer->Resize(
|
||||
vector<TIndex>{Y->dim32(1), Y->dim32(2), kernel_h(), kernel_w(), C});
|
||||
vector<int64_t>{Y->dim32(1), Y->dim32(2), kernel_h(), kernel_w(), C});
|
||||
T* col_buffer_data = col_buffer->template mutable_data<T>();
|
||||
// Im2Col, followed by gemm.
|
||||
for (int image_id = 0; image_id < N; ++image_id) {
|
||||
|
|
@ -504,7 +504,7 @@ bool ConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
|
|||
dbias->Resize(M);
|
||||
if (bias_multiplier_.size() != output_image_size) {
|
||||
// If the helper bias multiplier is not M, reshape and fill it with one.
|
||||
bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
|
||||
bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
|
||||
math::Set<T, Context>(
|
||||
output_image_size,
|
||||
static_cast<T>(1),
|
||||
|
|
@ -689,7 +689,7 @@ bool ConvGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
|
|||
math::Set<T, Context>(dbias->size(), 0, dbias_data, &context_);
|
||||
if (bias_multiplier_.size() != output_image_size) {
|
||||
// If the helper bias multiplier is not M, reshape and fill it with one.
|
||||
bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
|
||||
bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
|
||||
math::Set<T, Context>(
|
||||
output_image_size,
|
||||
static_cast<T>(1),
|
||||
|
|
|
|||
|
|
@ -246,7 +246,7 @@ class ConvPoolOpBase : public Operator<Context> {
|
|||
// Helper function that is also called from OperatorSchema. Modified
|
||||
// kernel parameters and output output_dims and channel_first.
|
||||
static inline void InferOutputSize(
|
||||
vector<TIndex> input_dims,
|
||||
vector<int64_t> input_dims,
|
||||
int /*output_channel*/,
|
||||
StorageOrder order,
|
||||
bool global_pooling,
|
||||
|
|
@ -259,7 +259,7 @@ class ConvPoolOpBase : public Operator<Context> {
|
|||
vector<int>& pads,
|
||||
bool& channel_first) {
|
||||
channel_first = false; // initialized to suppress compiler warning.
|
||||
vector<TIndex> dims;
|
||||
vector<int64_t> dims;
|
||||
switch (order) {
|
||||
case StorageOrder::NHWC:
|
||||
channel_first = false;
|
||||
|
|
@ -358,7 +358,7 @@ class ConvPoolOpBase : public Operator<Context> {
|
|||
if (bias_multiplier_->size() != size) {
|
||||
// If the helper bias multiplier is not image size, reshape and fill it
|
||||
// with one.
|
||||
bias_multiplier_->Resize(std::vector<TIndex>{size});
|
||||
bias_multiplier_->Resize(std::vector<int64_t>{size});
|
||||
math::Set<T, Context>(
|
||||
size,
|
||||
static_cast<T>(1),
|
||||
|
|
|
|||
|
|
@ -64,8 +64,8 @@ class CudnnConvTransposeOpBase : public ConvTransposeUnpoolBase<CUDAContext> {
|
|||
}
|
||||
|
||||
protected:
|
||||
vector<TIndex> cudnn_input_dims_;
|
||||
vector<TIndex> cudnn_filter_dims_;
|
||||
vector<int64_t> cudnn_input_dims_;
|
||||
vector<int64_t> cudnn_filter_dims_;
|
||||
|
||||
CuDNNWrapper cudnn_wrapper_;
|
||||
cudnnTensorDescriptor_t bottom_desc_;
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNCHW() {
|
|||
bias.dim32(0) == C,
|
||||
"bias dimension must be equal to output channel number");
|
||||
if (bias_multiplier_.size() != output_image_size) {
|
||||
bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
|
||||
bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
|
||||
T* bm_data = bias_multiplier_.template mutable_data<T>();
|
||||
math::Set<T, Context>(
|
||||
output_image_size,
|
||||
|
|
@ -61,7 +61,7 @@ bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNCHW() {
|
|||
|
||||
auto f = [&](Tensor* col_buffer) {
|
||||
col_buffer->Resize(
|
||||
vector<TIndex>{C, this->kernel_h(), this->kernel_w(), H, W});
|
||||
vector<int64_t>{C, this->kernel_h(), this->kernel_w(), H, W});
|
||||
T* col_buffer_data = col_buffer->template mutable_data<T>();
|
||||
for (auto image_id = 0; image_id < N; ++image_id) {
|
||||
// Weight term
|
||||
|
|
@ -167,7 +167,7 @@ bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNHWC() {
|
|||
bias.dim32(0) == C,
|
||||
"bias dimension must be equal to output channel number");
|
||||
if (bias_multiplier_.size() != output_image_size) {
|
||||
bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
|
||||
bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
|
||||
T* bm_data = bias_multiplier_.template mutable_data<T>();
|
||||
math::Set<T, Context>(
|
||||
output_image_size,
|
||||
|
|
@ -182,7 +182,7 @@ bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNHWC() {
|
|||
|
||||
auto f = [&](Tensor* /*col_buffer*/) {
|
||||
col_buffer_.Resize(
|
||||
vector<TIndex>{H, W, this->kernel_h(), this->kernel_w(), C});
|
||||
vector<int64_t>{H, W, this->kernel_h(), this->kernel_w(), C});
|
||||
T* col_buffer_data = col_buffer_.template mutable_data<T>();
|
||||
for (auto image_id = 0; image_id < N; ++image_id) {
|
||||
// Weight term
|
||||
|
|
@ -270,7 +270,7 @@ bool ConvTransposeGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
|
|||
const int output_image_size = dY.dim32(2) * dY.dim32(3);
|
||||
// The col buffer is stored in CHW order as well
|
||||
col_buffer_.Resize(
|
||||
vector<TIndex>{C, this->kernel_h(), this->kernel_w(), H, W});
|
||||
vector<int64_t>{C, this->kernel_h(), this->kernel_w(), H, W});
|
||||
if (!no_bias_) {
|
||||
auto* dbias = Output(BIAS_OR_INPUT_GRAD);
|
||||
dbias->Resize(C);
|
||||
|
|
@ -422,7 +422,7 @@ bool ConvTransposeGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
|
|||
const int output_image_size = dY.dim32(1) * dY.dim32(2);
|
||||
// The col buffer is stored in HWC order as well
|
||||
col_buffer_.Resize(
|
||||
vector<TIndex>{H, W, this->kernel_h(), this->kernel_w(), C});
|
||||
vector<int64_t>{H, W, this->kernel_h(), this->kernel_w(), C});
|
||||
if (!no_bias_) {
|
||||
auto* dbias = Output(BIAS_OR_INPUT_GRAD);
|
||||
dbias->Resize(C);
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
namespace caffe2 {
|
||||
|
||||
void AddConstInput(const vector<TIndex>& shape,
|
||||
void AddConstInput(const vector<int64_t>& shape,
|
||||
const float value,
|
||||
const string& name,
|
||||
Workspace* ws) {
|
||||
|
|
@ -23,7 +23,7 @@ void AddConstInput(const vector<TIndex>& shape,
|
|||
tensor->size(), value, tensor->template mutable_data<float>(), &context);
|
||||
}
|
||||
|
||||
void AddNoiseInput(const vector<TIndex>& shape,
|
||||
void AddNoiseInput(const vector<int64_t>& shape,
|
||||
const string& name,
|
||||
Workspace* ws) {
|
||||
DeviceOption option;
|
||||
|
|
@ -81,9 +81,9 @@ void compare(int N, int inputC, int H, int W,
|
|||
def1.add_arg()->CopyFrom(MakeArgument("adj_h", adjH));
|
||||
def1.add_arg()->CopyFrom(MakeArgument("adj_w", adjW));
|
||||
|
||||
AddNoiseInput(vector<TIndex>{N, inputC, H, W}, "X", &ws);
|
||||
AddNoiseInput(vector<TIndex>{inputC, outputC, kernelH, kernelW}, "W", &ws);
|
||||
AddNoiseInput(vector<TIndex>{outputC}, "B", &ws);
|
||||
AddNoiseInput(vector<int64_t>{N, inputC, H, W}, "X", &ws);
|
||||
AddNoiseInput(vector<int64_t>{inputC, outputC, kernelH, kernelW}, "W", &ws);
|
||||
AddNoiseInput(vector<int64_t>{outputC}, "B", &ws);
|
||||
|
||||
unique_ptr<OperatorBase> op1(CreateOperator(def1, &ws));
|
||||
EXPECT_NE(nullptr, op1.get());
|
||||
|
|
|
|||
|
|
@ -80,9 +80,9 @@ bool SigmoidCrossEntropyWithLogitsOp<float, CPUContext>::RunOnDevice() {
|
|||
|
||||
auto* out = Output(0);
|
||||
if (logits.ndim() == 0) {
|
||||
out->Resize(std::vector<TIndex>{});
|
||||
out->Resize(std::vector<int64_t>{});
|
||||
} else {
|
||||
std::vector<TIndex> dims(logits.dims().begin(), logits.dims().end() - 1);
|
||||
std::vector<int64_t> dims(logits.dims().begin(), logits.dims().end() - 1);
|
||||
out->Resize(dims);
|
||||
}
|
||||
auto* out_ptr = out->template mutable_data<float>();
|
||||
|
|
@ -162,9 +162,9 @@ bool WeightedSigmoidCrossEntropyWithLogitsOp<float, CPUContext>::RunOnDevice() {
|
|||
|
||||
auto* out = Output(0);
|
||||
if (logits.ndim() == 0) {
|
||||
out->Resize(std::vector<TIndex>{});
|
||||
out->Resize(std::vector<int64_t>{});
|
||||
} else {
|
||||
std::vector<TIndex> dims(logits.dims().begin(), logits.dims().end() - 1);
|
||||
std::vector<int64_t> dims(logits.dims().begin(), logits.dims().end() - 1);
|
||||
out->Resize(dims);
|
||||
}
|
||||
auto* out_ptr = out->template mutable_data<float>();
|
||||
|
|
@ -260,11 +260,11 @@ bool MakeTwoClassOp<float, CPUContext>::RunOnDevice() {
|
|||
auto* Y = Output(0);
|
||||
auto shape = X.dims();
|
||||
shape.push_back(2);
|
||||
TIndex N = X.size();
|
||||
int64_t N = X.size();
|
||||
Y->Resize(shape);
|
||||
const auto* Xdata = X.data<float>();
|
||||
auto* Ydata = Y->template mutable_data<float>();
|
||||
for (TIndex i = 0; i < N; ++i) {
|
||||
for (int64_t i = 0; i < N; ++i) {
|
||||
DCHECK_GE(Xdata[i], 0.0);
|
||||
DCHECK_LE(Xdata[i], 1.0);
|
||||
Ydata[i * 2] = 1.0 - Xdata[i];
|
||||
|
|
@ -284,9 +284,9 @@ bool MakeTwoClassGradientOp<float, CPUContext>::RunOnDevice() {
|
|||
dX->Resize(shape);
|
||||
const float* dYdata = dY.data<float>();
|
||||
float* dXdata = dX->template mutable_data<float>();
|
||||
TIndex N = dX->size();
|
||||
int64_t N = dX->size();
|
||||
// use eigen?
|
||||
for (TIndex i = 0; i < N; ++i) {
|
||||
for (int64_t i = 0; i < N; ++i) {
|
||||
dXdata[i] = dYdata[i * 2 + 1] - dYdata[i * 2];
|
||||
}
|
||||
return true;
|
||||
|
|
@ -308,7 +308,7 @@ bool CrossEntropyOp<float, CPUContext>::RunOnDevice() {
|
|||
CAFFE_ENFORCE(
|
||||
(label.ndim() == 1) || (label.ndim() == 2 && label.dim32(1) == D));
|
||||
CAFFE_ENFORCE_EQ(label.dim32(0), N);
|
||||
Y->Resize(vector<TIndex>{N});
|
||||
Y->Resize(vector<int64_t>{N});
|
||||
const float* Xdata = X.data<float>();
|
||||
const float* labelData = label.data<float>();
|
||||
auto* Ydata = Y->template mutable_data<float>();
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ bool LabelCrossEntropyOp<float, CUDAContext>::RunOnDevice() {
|
|||
CAFFE_ENFORCE(
|
||||
(label.ndim() == 1) || (label.ndim() == 2 && label.dim32(1) == 1));
|
||||
CAFFE_ENFORCE_EQ(label.dim32(0), N);
|
||||
Y->Resize(vector<TIndex>(size_t(1), N));
|
||||
Y->Resize(vector<int64_t>(size_t(1), N));
|
||||
LabelCrossEntropyKernel<<<
|
||||
CAFFE_GET_BLOCKS(N),
|
||||
CAFFE_CUDA_NUM_THREADS,
|
||||
|
|
@ -250,9 +250,9 @@ bool SigmoidCrossEntropyWithLogitsOp<float, CUDAContext>::RunOnDevice() {
|
|||
|
||||
auto* out = Output(0);
|
||||
if (logits.ndim() == 0) {
|
||||
out->Resize(std::vector<TIndex>{});
|
||||
out->Resize(std::vector<int64_t>{});
|
||||
} else {
|
||||
std::vector<TIndex> dims(logits.dims().begin(), logits.dims().end() - 1);
|
||||
std::vector<int64_t> dims(logits.dims().begin(), logits.dims().end() - 1);
|
||||
out->Resize(dims);
|
||||
}
|
||||
auto* out_ptr = out->template mutable_data<float>();
|
||||
|
|
@ -372,9 +372,9 @@ bool WeightedSigmoidCrossEntropyWithLogitsOp<float, CUDAContext>::
|
|||
|
||||
auto* out = Output(0);
|
||||
if (logits.ndim() == 0) {
|
||||
out->Resize(std::vector<TIndex>{});
|
||||
out->Resize(std::vector<int64_t>{});
|
||||
} else {
|
||||
std::vector<TIndex> dims(logits.dims().begin(), logits.dims().end() - 1);
|
||||
std::vector<int64_t> dims(logits.dims().begin(), logits.dims().end() - 1);
|
||||
out->Resize(dims);
|
||||
}
|
||||
auto* out_ptr = out->template mutable_data<float>();
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ bool CTCBeamSearchDecoderOp<CPUContext>::RunOnDevice() {
|
|||
(InputSize() == 2) ? Input(SEQ_LEN).data<int>() : nullptr;
|
||||
|
||||
vector<int32_t> values_cache;
|
||||
output_len->Resize(vector<TIndex>{batch_size});
|
||||
output_len->Resize(vector<int64_t>{batch_size});
|
||||
int* output_len_data = output_len->mutable_data<int>();
|
||||
|
||||
for (int32_t i = 0; i < batch_size; ++i) {
|
||||
|
|
@ -121,7 +121,7 @@ bool CTCBeamSearchDecoderOp<CPUContext>::RunOnDevice() {
|
|||
}
|
||||
|
||||
int32_t cache_size = values_cache.size();
|
||||
values->Resize(vector<TIndex>{cache_size});
|
||||
values->Resize(vector<int64_t>{cache_size});
|
||||
int* values_data = values->mutable_data<int>();
|
||||
for (int i = 0; i < values_cache.size(); ++i) {
|
||||
values_data[i] = values_cache.at(i);
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ bool CTCGreedyDecoderOp<CPUContext>::RunOnDevice() {
|
|||
(InputSize() == 2) ? Input(SEQ_LEN).data<int>() : nullptr;
|
||||
|
||||
vector<int> values_cach;
|
||||
output_len->Resize(vector<TIndex>{batch_size});
|
||||
output_len->Resize(vector<int64_t>{batch_size});
|
||||
int* output_len_data = output_len->template mutable_data<int>();
|
||||
|
||||
for (int32_t i = 0; i < batch_size; ++i) {
|
||||
|
|
@ -54,7 +54,7 @@ bool CTCGreedyDecoderOp<CPUContext>::RunOnDevice() {
|
|||
}
|
||||
|
||||
int32_t values_cach_size = values_cach.size();
|
||||
values->Resize(vector<TIndex>{values_cach_size});
|
||||
values->Resize(vector<int64_t>{values_cach_size});
|
||||
int* values_data = values->mutable_data<int>();
|
||||
for (int i = 0; i < values_cach.size(); ++i) {
|
||||
values_data[i] = values_cach.at(i);
|
||||
|
|
|
|||
|
|
@ -155,7 +155,7 @@ void TreeWalker::advance() {
|
|||
cursor_.it.advance(lengths_, cursor_.offsets, sizes_, limits_, 1);
|
||||
}
|
||||
|
||||
std::vector<TIndex> TreeWalker::fieldDim(int fieldId) const {
|
||||
std::vector<int64_t> TreeWalker::fieldDim(int fieldId) const {
|
||||
auto tensorDim = input(fieldId).dims();
|
||||
tensorDim[0] = sizes_[lengthIdx(fieldId)];
|
||||
return tensorDim;
|
||||
|
|
@ -355,7 +355,7 @@ class UnPackRecordsOp : public Operator<CPUContext> {
|
|||
auto numTensors = OutputSize();
|
||||
|
||||
// Precomputer the output sizes to avoid resizing
|
||||
std::vector<std::vector<TIndex>> outputDims(numTensors);
|
||||
std::vector<std::vector<int64_t>> outputDims(numTensors);
|
||||
std::vector<const TypeMeta*> metas(numTensors);
|
||||
|
||||
CAFFE_ENFORCE(
|
||||
|
|
@ -414,7 +414,7 @@ class UnPackRecordsOp : public Operator<CPUContext> {
|
|||
|
||||
private:
|
||||
void getShapeAndMetaFromInput(
|
||||
std::vector<std::vector<TIndex>>& outputDims,
|
||||
std::vector<std::vector<int64_t>>& outputDims,
|
||||
std::vector<const TypeMeta*>& metas) {
|
||||
const auto* inputs = Input(0).template data<SharedTensorVectorPtr>();
|
||||
|
||||
|
|
@ -434,7 +434,7 @@ class UnPackRecordsOp : public Operator<CPUContext> {
|
|||
}
|
||||
|
||||
void getShapeAndMetaFromPrototypeBlobs(
|
||||
std::vector<std::vector<TIndex>>& outputDims,
|
||||
std::vector<std::vector<int64_t>>& outputDims,
|
||||
std::vector<const TypeMeta*>& metas) {
|
||||
const auto numTensors = fields_.size();
|
||||
CAFFE_ENFORCE_EQ(numTensors, InputSize() - 1);
|
||||
|
|
@ -501,7 +501,7 @@ class ReadNextBatchOp : public Operator<CPUContext> {
|
|||
}
|
||||
}
|
||||
// gather data
|
||||
std::vector<TIndex> outDim;
|
||||
std::vector<int64_t> outDim;
|
||||
for (int i = 0; i < cursor->it.fields().size(); ++i) {
|
||||
auto lengthIdx = cursor->it.fields()[i].lengthFieldId + 1;
|
||||
auto size = sizes[lengthIdx];
|
||||
|
|
@ -676,7 +676,7 @@ class ReadRandomBatchOp : public Operator<CPUContext> {
|
|||
auto idxvec = idxblob.template data<int64_t>();
|
||||
auto& offsetdim = offsetsmat.dims();
|
||||
// gather data
|
||||
std::vector<TIndex> outDim;
|
||||
std::vector<int64_t> outDim;
|
||||
int64_t idx;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(cursor->mutex_);
|
||||
|
|
@ -883,7 +883,7 @@ class ConcatTensorVectorOp final : public Operator<Context> {
|
|||
auto* tensor = Output(TENSOR);
|
||||
CAFFE_ENFORCE(!tensorVector->empty());
|
||||
|
||||
vector<TIndex> outputDims(tensorVector->at(0).dims());
|
||||
vector<int64_t> outputDims(tensorVector->at(0).dims());
|
||||
CAFFE_ENFORCE(outputDims.size() > 0);
|
||||
for (int i = 1; i < tensorVector->size(); i++) {
|
||||
// the tensor shapes are the same except for the first dimension
|
||||
|
|
@ -895,7 +895,7 @@ class ConcatTensorVectorOp final : public Operator<Context> {
|
|||
}
|
||||
|
||||
tensor->Resize(outputDims);
|
||||
TIndex offset = 0;
|
||||
int64_t offset = 0;
|
||||
auto* dst = (char*)tensor->raw_mutable_data(tensorVector->at(0).meta());
|
||||
|
||||
for (const auto& t : *tensorVector) {
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ class TreeWalker {
|
|||
return prevOffsets_[lengthIdx(fieldId)];
|
||||
}
|
||||
|
||||
std::vector<TIndex> fieldDim(int fieldId) const;
|
||||
std::vector<int64_t> fieldDim(int fieldId) const;
|
||||
|
||||
void* fieldPtr(int fieldId) const;
|
||||
|
||||
|
|
@ -134,12 +134,12 @@ class TreeWalker {
|
|||
Field(TreeWalker& walker, int fieldId)
|
||||
: walker_(walker), fieldId_(fieldId) {}
|
||||
|
||||
inline std::vector<TIndex> dim() const {
|
||||
inline std::vector<int64_t> dim() const {
|
||||
return walker_.fieldDim(fieldId_);
|
||||
}
|
||||
|
||||
inline TIndex size() const {
|
||||
TIndex size = 1;
|
||||
inline int64_t size() const {
|
||||
int64_t size = 1;
|
||||
for (const auto d : dim()) {
|
||||
size *= d;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,8 +67,8 @@
|
|||
|
||||
namespace caffe2 {
|
||||
|
||||
typedef TIndex index_t;
|
||||
typedef std::vector<TIndex> TShape;
|
||||
typedef int64_t index_t;
|
||||
typedef std::vector<int64_t> TShape;
|
||||
|
||||
template <typename DType>
|
||||
__device__ DType deformable_im2col_bilinear(
|
||||
|
|
@ -304,8 +304,8 @@ template <typename DType, typename Context>
|
|||
void DeformConvOpBase<DType, Context>::DeformableIm2col(
|
||||
const DType* data_im,
|
||||
const DType* data_offset,
|
||||
const std::vector<TIndex>& im_shape,
|
||||
const std::vector<TIndex>& col_shape,
|
||||
const std::vector<int64_t>& im_shape,
|
||||
const std::vector<int64_t>& col_shape,
|
||||
DType* data_col) {
|
||||
CHECK_LT(2, CAFFE_CUDA_NUM_THREADS);
|
||||
CAFFE_ENFORCE_EQ(pad_t(), pad_b());
|
||||
|
|
@ -430,8 +430,8 @@ template <typename DType, typename Context>
|
|||
void DeformConvOpBase<DType, Context>::DeformableCol2im(
|
||||
const DType* data_col,
|
||||
const DType* data_offset,
|
||||
const std::vector<TIndex>& im_shape,
|
||||
const std::vector<TIndex>& col_shape,
|
||||
const std::vector<int64_t>& im_shape,
|
||||
const std::vector<int64_t>& col_shape,
|
||||
DType* grad_im) {
|
||||
CAFFE_ENFORCE_EQ(pad_t(), pad_b());
|
||||
CAFFE_ENFORCE_EQ(pad_l(), pad_r());
|
||||
|
|
@ -577,8 +577,8 @@ void DeformConvOpBase<DType, Context>::DeformableCol2imCoord(
|
|||
const DType* data_col,
|
||||
const DType* data_im,
|
||||
const DType* data_offset,
|
||||
const std::vector<TIndex>& im_shape,
|
||||
const std::vector<TIndex>& col_shape,
|
||||
const std::vector<int64_t>& im_shape,
|
||||
const std::vector<int64_t>& col_shape,
|
||||
DType* grad_offset) {
|
||||
CAFFE_ENFORCE_EQ(pad_t(), pad_b());
|
||||
CAFFE_ENFORCE_EQ(pad_l(), pad_r());
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user