Remove many caffe2::TIndex and replace them with int64_t (#11943)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/11943

See title

Reviewed By: ezyang

Differential Revision: D9992645

fbshipit-source-id: e8f80d6ea762971513e5e8072975ceea53e1f11a
This commit is contained in:
Christian Puhrsch 2018-09-22 18:07:38 -07:00 committed by Facebook Github Bot
parent 5d0f1c3c8f
commit a6630e25af
248 changed files with 1446 additions and 1454 deletions

View File

@ -139,7 +139,7 @@ BENCHMARK(BM_cudaStreamWaitEventThenStreamSynchronize);
static void BM_CudaPointerAffinity(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
Tensor tensor(vector<TIndex>{1, 2, 3, 4}, CUDA);
Tensor tensor(vector<int64_t>{1, 2, 3, 4}, CUDA);
float* ptr = tensor.mutable_data<float>();
while (state.KeepRunning()) {
volatile int id = GetGPUIDForPointer(ptr);

View File

@ -144,7 +144,7 @@ private:
}
template <typename T>
void assignToValue(Tensor* dst, T v) {
dst->Resize(std::vector<TIndex>());
dst->Resize(std::vector<int64_t>());
math::Set(1, v, dst->template mutable_data<T>(), &context_);
}
int findImplementation(const OperatorDef& operator_def) {

View File

@ -75,7 +75,7 @@ class AllgatherOp final : public Operator<Context> {
auto comm_size =
OperatorBase::Input<std::shared_ptr<::gloo::Context>>(0)->size;
const auto dims =
std::vector<TIndex>(1, (InputSize() - 1) * Input(1).size() * comm_size);
std::vector<int64_t>(1, (InputSize() - 1) * Input(1).size() * comm_size);
Output(0)->Resize(dims);
// Store which inputs/outputs this instance initialized with

View File

@ -269,7 +269,7 @@ void NCCL<T>::AllGather(const NCCLExecution& ex) {
ex,
[n](const NCCLElement& ctx) {
CAFFE_ENFORCE_NE(ctx.src, ctx.dst);
std::vector<TIndex> dims;
std::vector<int64_t> dims;
dims.reserve(ctx.src->ndim() + 1);
dims.push_back(n);
for (auto d : ctx.src->dims()) {
@ -307,7 +307,7 @@ void NCCL<T>::ReduceScatter(const NCCLExecution& ex) {
[](const NCCLElement& ctx) {
CAFFE_ENFORCE_NE(ctx.src, ctx.dst);
const auto& srcDims = ctx.src->dims();
std::vector<TIndex> dstDims(srcDims.begin() + 1, srcDims.end());
std::vector<int64_t> dstDims(srcDims.begin() + 1, srcDims.end());
ctx.dst->Resize(dstDims);
ctx.dst->template mutable_data<T>();
},

View File

@ -15,7 +15,7 @@ namespace {
// Otherwise, return the product of CHW dimensions
int64_t CheckDims(
const nvinfer1::Dims& nv_dims,
const std::vector<TIndex>& c2_dims) {
const std::vector<int64_t>& c2_dims) {
if (nv_dims.nbDims + 1 != c2_dims.size()) {
CAFFE_THROW(
"Mismatched dimensions between TRT input (",
@ -115,7 +115,7 @@ TensorRTOp::TensorRTOp(const OperatorDef& operator_def, Workspace* ws)
const std::string key = MakeString("output_size_hint_", output_idx);
auto output_size_hint = OperatorBase::GetRepeatedArgument<int>(key);
if (!output_size_hint.empty()) {
std::vector<TIndex> dims;
std::vector<int64_t> dims;
for (const auto v : output_size_hint) {
dims.push_back(v);
}
@ -130,17 +130,17 @@ TensorRTOp::TensorRTOp(const OperatorDef& operator_def, Workspace* ws)
void TensorRTOp::MaybeAdjustOutputShape(
int output_idx,
std::vector<TIndex>* dims) {
std::vector<int64_t>* dims) {
const auto it = output_size_hints_.find(output_idx);
if (it != output_size_hints_.end()) {
const auto& dims_hint = it->second;
auto total_trt = std::accumulate(
dims->begin(), dims->end(), (TIndex)(1), std::multiplies<TIndex>());
dims->begin(), dims->end(), (int64_t)(1), std::multiplies<int64_t>());
auto total_c2 = std::accumulate(
dims_hint.begin(),
dims_hint.end(),
(TIndex)(1),
std::multiplies<TIndex>());
(int64_t)(1),
std::multiplies<int64_t>());
CAFFE_ENFORCE_EQ(
total_trt,
total_c2,
@ -204,7 +204,7 @@ bool TensorRTOp::RunOnDevice() {
} else {
// output, we need to allocate the output tensor at first batch run
auto* output_tensor = Output(output_idx);
std::vector<TIndex> tensor_dims;
std::vector<int64_t> tensor_dims;
tensor_dims.push_back(N);
int64_t chw = 1;
for (int i = 0; i < dims.nbDims; ++i) {

View File

@ -17,13 +17,13 @@ class TensorRTOp final : public Operator<CUDAContext> {
virtual ~TensorRTOp() noexcept {}
private:
void MaybeAdjustOutputShape(int output_idx, std::vector<TIndex>* dims);
void MaybeAdjustOutputShape(int output_idx, std::vector<int64_t>* dims);
tensorrt::TrtLogger logger_;
int max_batch_size_;
std::vector<nvinfer1::Dims> nv_dims_;
std::vector<bool> is_input_;
std::unordered_map<int, std::vector<TIndex>> output_size_hints_;
std::unordered_map<int, std::vector<int64_t>> output_size_hints_;
std::shared_ptr<nvinfer1::ICudaEngine> trt_engine_{nullptr};
std::shared_ptr<nvinfer1::IExecutionContext> trt_executor_{nullptr};
bool batch_warning_issued_{false};

View File

@ -139,7 +139,7 @@ void TensorSerializer::SerializeWithChunkSize(
// Serialize whole vector. If vector is empty, it's shape still needs to be
// serialized in empty proto
for (size_t chunkBegin = 0;
chunkBegin < std::max(tensor.size(), static_cast<TIndex>(1));
chunkBegin < std::max(tensor.size(), static_cast<int64_t>(1));
chunkBegin += chunk_size) {
VLOG(2) << "Starting a chunk at " << chunkBegin;
#ifndef __ANDROID__
@ -374,8 +374,8 @@ void TensorDeserializer::Deserialize(const TensorProto& proto, Tensor* tensor) {
tensor->GetStaticContext()->CreateContext(proto.device_detail());
auto context = uniq_ptr.get();
context->SwitchToDevice(0);
vector<TIndex> dims;
for (const TIndex d : proto.dims()) {
vector<int64_t> dims;
for (const int64_t d : proto.dims()) {
dims.push_back(d);
}
tensor->Resize(dims);

View File

@ -557,9 +557,9 @@ TEST(TensorTest, TensorNonFundamentalTypeClone) {
TEST(TensorTest, Tensor64BitDimension) {
// Initialize a large tensor.
TIndex large_number =
int64_t large_number =
static_cast<int64_t>(std::numeric_limits<int>::max()) + 1;
Tensor tensor(vector<TIndex>{large_number}, CPU);
Tensor tensor(vector<int64_t>{large_number}, CPU);
EXPECT_EQ(tensor.ndim(), 1);
EXPECT_EQ(tensor.dim(0), large_number);
EXPECT_EQ(tensor.size(), large_number);
@ -589,9 +589,9 @@ TEST(TensorTest, Tensor64BitDimension) {
}
TEST(TensorDeathTest, CannotCastDownLargeDims) {
TIndex large_number =
int64_t large_number =
static_cast<int64_t>(std::numeric_limits<int>::max()) + 1;
Tensor tensor(vector<TIndex>{large_number}, CPU);
Tensor tensor(vector<int64_t>{large_number}, CPU);
EXPECT_EQ(tensor.ndim(), 1);
EXPECT_EQ(tensor.dim(0), large_number);
ASSERT_THROW(tensor.dim32(0), EnforceNotMet);
@ -694,7 +694,7 @@ TEST(TensorTest, TensorSerialization_CustomType) {
}
TEST(TensorTest, Half) {
const TIndex kSize = 3000000;
const int64_t kSize = 3000000;
Blob blob;
TensorCPU* tensor = blob.GetMutableTensor(CPU);
tensor->Resize(kSize);

View File

@ -145,7 +145,7 @@ using EnforceNotMet = at::Error;
* functions to caffe2::enforce_detail namespace. For example:
*
* namespace caffe2 { namespace enforce_detail {
* inline EnforceFailMessage IsVector(const vector<TIndex>& shape) {
* inline EnforceFailMessage IsVector(const vector<int64_t>& shape) {
* if (shape.size() == 1) { return EnforceOK(); }
* return MakeString("Shape ", shape, " is not a vector");
* }

View File

@ -581,7 +581,7 @@ TensorShapes InferBlobShapesAndTypesFromWorkspace(
}
TensorShapes InferBlobShapesAndTypesFromMap(
const CaffeMap<std::string, std::vector<TIndex>>& blob_dimensions,
const CaffeMap<std::string, std::vector<int64_t>>& blob_dimensions,
const vector<NetDef*>& nets) {
CaffeMap<string, TensorShape> blob_desc;
// Populate shapes from known blobs
@ -597,7 +597,7 @@ TensorShapes InferBlobShapesAndTypesFromMap(
}
TensorShapes InferBlobShapesAndTypesFromMap(
const CaffeMap<std::string, std::vector<TIndex>>& blob_dimensions,
const CaffeMap<std::string, std::vector<int64_t>>& blob_dimensions,
const CaffeMap<std::string, TensorProto_DataType>& blob_types,
const vector<NetDef*>& nets) {
CaffeMap<string, TensorShape> blob_desc;

View File

@ -700,7 +700,7 @@ struct DispatchHelper<FixedValues<FirstVal, Values...>, ExtraArgs...> {
template <typename... ExtraArgs>
struct DispatchHelper<FixedValues<>, ExtraArgs...> {
template <typename Op>
static bool call(Op* op, TIndex /*size*/) {
static bool call(Op* op, int64_t /*size*/) {
return op->template DoRunWithValue<ExtraArgs..., -1>();
}
};
@ -973,11 +973,11 @@ CAFFE2_API TensorShapes InferBlobShapesAndTypesFromWorkspace(
const vector<NetDef*>& nets);
CAFFE2_API TensorShapes InferBlobShapesAndTypesFromMap(
const CaffeMap<std::string, std::vector<TIndex>>& blob_dimensions,
const CaffeMap<std::string, std::vector<int64_t>>& blob_dimensions,
const vector<NetDef*>& nets);
CAFFE2_API TensorShapes InferBlobShapesAndTypesFromMap(
const CaffeMap<std::string, std::vector<TIndex>>& blob_dimensions,
const CaffeMap<std::string, std::vector<int64_t>>& blob_dimensions,
const CaffeMap<std::string, TensorProto_DataType>& blob_types,
const vector<NetDef*>& nets);

View File

@ -331,7 +331,7 @@ int OpSchema::CalculateOutput(int num_input) const {
}
static void SparseLengthsFillerHelper(
const std::vector<std::vector<TIndex>>& shapes,
const std::vector<std::vector<int64_t>>& shapes,
size_t value_index,
size_t length_index,
std::vector<TensorFiller>* fillers) {
@ -341,7 +341,7 @@ static void SparseLengthsFillerHelper(
}
static void SparseSegmentsFillerHelper(
const std::vector<std::vector<TIndex>>& shapes,
const std::vector<std::vector<int64_t>>& shapes,
size_t value_index,
size_t segment_index,
std::vector<TensorFiller>* fillers) {
@ -364,7 +364,7 @@ OpSchema& OpSchema::ValueKeyLengthInputFillers(
size_t key_index,
size_t length_index) {
filler_supplier_ = [this, value_index, key_index, length_index](
const std::vector<std::vector<TIndex>>& shapes) {
const std::vector<std::vector<int64_t>>& shapes) {
auto fillers = SupplyDenseFillers(shapes);
// fill in the length (value_index is used to get the correct shape)
SparseLengthsFillerHelper(shapes, key_index, length_index, &fillers);
@ -383,7 +383,7 @@ OpSchema& OpSchema::ValueLengthInputFillers(
size_t value_index,
size_t length_index) {
filler_supplier_ = [this, value_index, length_index](
const std::vector<std::vector<TIndex>>& shapes) {
const std::vector<std::vector<int64_t>>& shapes) {
auto fillers = SupplyDenseFillers(shapes);
// fill in the length (value_index is used to get the correct shape)
SparseLengthsFillerHelper(shapes, value_index, length_index, &fillers);
@ -394,7 +394,7 @@ OpSchema& OpSchema::ValueLengthInputFillers(
OpSchema& OpSchema::DisallowInputFillers() {
filler_supplier_ =
[this](const std::vector<std::vector<TIndex>>& /* unused */) {
[this](const std::vector<std::vector<int64_t>>& /* unused */) {
throw std::invalid_argument(type_ + " does not have input fillers");
return std::vector<TensorFiller>();
};
@ -402,12 +402,12 @@ OpSchema& OpSchema::DisallowInputFillers() {
}
std::vector<TensorFiller> OpSchema::InputFillers(
const std::vector<std::vector<TIndex>>& shapes) const {
const std::vector<std::vector<int64_t>>& shapes) const {
return filler_supplier_(shapes);
}
std::vector<TensorFiller> OpSchema::SupplyDenseFillers(
const std::vector<std::vector<TIndex>>& shapes) {
const std::vector<std::vector<int64_t>>& shapes) {
std::vector<TensorFiller> fillers;
for (const auto& shape : shapes) {
fillers.emplace_back(shape);

View File

@ -383,11 +383,11 @@ class CAFFE2_API OpSchema {
OpSchema& DisallowInputFillers();
std::vector<TensorFiller> InputFillers(
const std::vector<std::vector<TIndex>>& shapes) const;
const std::vector<std::vector<int64_t>>& shapes) const;
private:
std::vector<TensorFiller> SupplyDenseFillers(
const std::vector<std::vector<TIndex>>& shapes);
const std::vector<std::vector<int64_t>>& shapes);
private:
string type_;
@ -438,9 +438,9 @@ class CAFFE2_API OpSchema {
};
std::function<std::vector<TensorFiller>(
const std::vector<std::vector<TIndex>>&)>
const std::vector<std::vector<int64_t>>&)>
filler_supplier_ =
[this](const std::vector<std::vector<TIndex>>& shapes) {
[this](const std::vector<std::vector<int64_t>>& shapes) {
return SupplyDenseFillers(shapes);
};
};
@ -508,8 +508,8 @@ inline TensorShape CreateTensorShape(
}
// Helper function
inline vector<TIndex> GetDimsVector(const TensorShape& shape) {
vector<TIndex> dims;
inline vector<int64_t> GetDimsVector(const TensorShape& shape) {
vector<int64_t> dims;
for (auto d : shape.dims()) {
dims.push_back(d);
}

View File

@ -212,8 +212,8 @@ class CAFFE2_EXPORT QTensor {
/**
* Return product of all dimensions starting from K.
*/
inline TIndex size_from_dim(int k) const {
TIndex r = 1;
inline int64_t size_from_dim(int k) const {
int64_t r = 1;
for (int i = k; i < dims_.size(); ++i) {
r *= dims_[i];
}
@ -223,9 +223,9 @@ class CAFFE2_EXPORT QTensor {
/**
* Product of all dims up to.
*/
inline TIndex size_to_dim(int k) const {
inline int64_t size_to_dim(int k) const {
CAFFE_ENFORCE(k < dims_.size());
TIndex r = 1;
int64_t r = 1;
for (int i = 0; i < k; ++i) {
r *= dims_[i];
}

View File

@ -77,7 +77,7 @@ void RegisterTypeCallFunction(TypeIdentifier id, TypeCall c) {
int GetGPUIDForPointer(const void* ptr);
vector<TIndex> GetTensorInfo(
vector<int64_t> GetTensorInfo(
const void* c,
size_t* capacity,
DeviceOption* device) {

View File

@ -59,7 +59,7 @@ class CAFFE2_API Tensor final {
* Note that the actual data allocation is not going to be carried out until
* the first time mutable_data() is called.
*/
explicit Tensor(const vector<TIndex>& dims, DeviceType type)
explicit Tensor(const vector<int64_t>& dims, DeviceType type)
: Tensor(Storage(type)) {
// TODO: here, we create a Storage
// and immediately discard it in Resize() since
@ -96,7 +96,7 @@ class CAFFE2_API Tensor final {
*/
template <typename T>
Tensor(
const vector<TIndex>& dims,
const vector<int64_t>& dims,
const vector<T>& values,
BaseContext* context)
: Tensor(Storage(context->device_type(), TypeMeta::Make<T>())) {
@ -115,7 +115,7 @@ class CAFFE2_API Tensor final {
typename = typename std::enable_if<std::is_scalar<T>::value>::type>
Tensor(const T& value, BaseContext* context)
: Tensor(Storage(context->device_type(), TypeMeta::Make<T>())) {
Resize(std::vector<TIndex>{});
Resize(std::vector<int64_t>{});
context->CopyItemsFromCPU(
storage().dtype(), size(), &value, mutable_data<T>());
}
@ -142,15 +142,15 @@ class CAFFE2_API Tensor final {
impl_.get()->CopyFrom(*src.impl_.get(), context);
}
void ExtendTo(TIndex num, float growthPct, BaseContext* context) const {
void ExtendTo(int64_t num, float growthPct, BaseContext* context) const {
impl_.get()->ExtendTo(num, growthPct, context);
}
void Extend(TIndex num, float growthPct, BaseContext* context) const {
void Extend(int64_t num, float growthPct, BaseContext* context) const {
impl_.get()->Extend(num, growthPct, context);
}
void ShrinkTo(TIndex outer_dim) const {
void ShrinkTo(int64_t outer_dim) const {
impl_.get()->ShrinkTo(outer_dim);
}
@ -168,7 +168,7 @@ class CAFFE2_API Tensor final {
impl_.get()->ResizeLike(*src_tensor.impl_.get());
}
inline void Reshape(const vector<TIndex>& dims) const {
inline void Reshape(const vector<int64_t>& dims) const {
impl_.get()->Reshape(dims);
}
@ -250,7 +250,7 @@ class CAFFE2_API Tensor final {
return impl_.get()->ndim();
}
inline TIndex size() const {
inline int64_t size() const {
return impl_.get()->size();
}
@ -266,19 +266,19 @@ class CAFFE2_API Tensor final {
return impl_.get()->capacity_nbytes();
}
inline const vector<TIndex>& dims() const {
inline const vector<int64_t>& dims() const {
return impl_.get()->dims();
}
inline TIndex size_from_dim(int k) const {
inline int64_t size_from_dim(int k) const {
return impl_.get()->size_from_dim(k);
}
inline TIndex size_to_dim(int k) const {
inline int64_t size_to_dim(int k) const {
return impl_.get()->size_to_dim(k);
}
inline TIndex size_between_dim(int k, int l) const {
inline int64_t size_between_dim(int k, int l) const {
return impl_.get()->size_between_dim(k, l);
}
@ -311,7 +311,7 @@ class CAFFE2_API Tensor final {
return impl_.get()->dim32(i);
}
inline TIndex dim(const int i) const {
inline int64_t dim(const int i) const {
return impl_.get()->dim(i);
}
@ -337,7 +337,7 @@ TypeCall GetTypeCallFunction(TypeIdentifier id);
void RegisterTypeCallFunction(TypeIdentifier id, TypeCall c);
// Shape call registry
typedef vector<TIndex> (*TensorInfoCall)(
typedef vector<int64_t> (*TensorInfoCall)(
const void*,
size_t* capacity,
DeviceOption* device);
@ -377,7 +377,7 @@ void TensorPrinter::Print(const Tensor& tensor) {
std::stringstream values_stream;
// One most likely doesn't want to print int64-number of items for visual
// inspection, so we cast down to int here.
int total_count = static_cast<int>(std::min(tensor.size(), TIndex(limit_)));
int total_count = static_cast<int>(std::min(tensor.size(), int64_t(limit_)));
const T* tensor_data = tensor.template data<T>();
for (int i = 0; i < total_count - 1; ++i) {
values_stream << tensor_data[i] << ",";

View File

@ -26,17 +26,17 @@ namespace caffe2 {
class DeviceOption;
/**
* A utility function to convert vector<int> to vector<TIndex>.
* A utility function to convert vector<int> to vector<int64_t>.
*/
inline std::vector<TIndex> ToVectorTIndex(const std::vector<int>& src) {
return std::vector<TIndex>(src.begin(), src.end());
inline std::vector<int64_t> ToVectorint64_t(const std::vector<int>& src) {
return std::vector<int64_t>(src.begin(), src.end());
}
/**
* Return product of all dimensions starting from k
*/
inline TIndex size_from_dim_(int k, const std::vector<TIndex>& dims) {
TIndex r = 1;
inline int64_t size_from_dim_(int k, const std::vector<int64_t>& dims) {
int64_t r = 1;
for (size_t i = k; i < dims.size(); ++i) {
r *= dims[i];
}
@ -44,9 +44,9 @@ inline TIndex size_from_dim_(int k, const std::vector<TIndex>& dims) {
}
// Product of all dims up to k (not including dims[k])
inline TIndex size_to_dim_(int k, const std::vector<TIndex>& dims) {
inline int64_t size_to_dim_(int k, const std::vector<int64_t>& dims) {
CAFFE_ENFORCE((unsigned)k <= dims.size());
TIndex r = 1;
int64_t r = 1;
for (int i = 0; i < k; ++i) {
r *= dims[i];
}
@ -54,9 +54,9 @@ inline TIndex size_to_dim_(int k, const std::vector<TIndex>& dims) {
}
// Product of all dims between k and l (not including dims[k] and dims[l])
inline TIndex size_between_dim_(int k, int l, const std::vector<TIndex>& dims) {
inline int64_t size_between_dim_(int k, int l, const std::vector<int64_t>& dims) {
CAFFE_ENFORCE((unsigned)l < dims.size());
TIndex r = 1;
int64_t r = 1;
if (k < l) {
for (int i = k + 1; i < l; ++i) {
r *= dims[i];
@ -191,7 +191,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
* @brief Extend the outer-most dimension of this tensor
* to dimension of `num`.
*/
void ExtendTo(TIndex num, float growthPct, at::BaseContext* context) {
void ExtendTo(int64_t num, float growthPct, at::BaseContext* context) {
CAFFE_ENFORCE_GE_WITH_CALLER(dims_.size(), 1);
CAFFE_ENFORCE_GE_WITH_CALLER(growthPct, 0);
CAFFE_ENFORCE(context != nullptr, "Context must be provided.");
@ -207,7 +207,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
* growthPct. This ensures that Extend runs on an amortized O(1) time
* complexity.
*/
void Extend(TIndex num, float growthPct, at::BaseContext* context) {
void Extend(int64_t num, float growthPct, at::BaseContext* context) {
CAFFE_ENFORCE_GE_WITH_CALLER(dims_.size(), 1);
CAFFE_ENFORCE_GE_WITH_CALLER(
num, 0, "`num` must be non-negative for Extend");
@ -223,8 +223,8 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
auto newNumel = std::accumulate(
newDims.begin(),
newDims.end(),
static_cast<TIndex>(1),
std::multiplies<TIndex>());
static_cast<int64_t>(1),
std::multiplies<int64_t>());
if (newNumel * storage_.itemsize() <= storage_.capacity()) {
dims_ = newDims;
numel_ = newNumel;
@ -253,7 +253,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
* This method guarantees that no re-allocations are carried out, which means
* that the extra capacity after the end of the shurnk tensor is maintained.
*/
void ShrinkTo(TIndex outer_dim) {
void ShrinkTo(int64_t outer_dim) {
CAFFE_ENFORCE_WITH_CALLER(
is_contiguous_,
"Right now ShrinkTo is only supported on contiguous Tensor.");
@ -268,8 +268,8 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
numel_ = std::accumulate(
dims_.begin(),
dims_.end(),
static_cast<TIndex>(1),
std::multiplies<TIndex>());
static_cast<int64_t>(1),
std::multiplies<int64_t>());
}
/**
@ -292,8 +292,8 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
auto newNumel = std::accumulate(
newCapacity.begin(),
newCapacity.end(),
static_cast<TIndex>(1),
std::multiplies<TIndex>());
static_cast<int64_t>(1),
std::multiplies<int64_t>());
if (newNumel * storage_.itemsize() <= storage_.capacity()) {
return;
}
@ -365,11 +365,11 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
* Resizes the tensor without touching underlying storage.
* This requires the total size of the tensor to remains constant.
*/
inline void Reshape(const std::vector<TIndex>& dims) {
inline void Reshape(const std::vector<int64_t>& dims) {
CAFFE_ENFORCE_WITH_CALLER(
is_contiguous_,
"Right now Reshape is only supported for contiguous Tensor.");
TIndex new_size = 1;
int64_t new_size = 1;
for (auto d : dims) {
CAFFE_ENFORCE_GE_WITH_CALLER(d, 0);
new_size *= d;
@ -387,7 +387,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
}
inline void Reshape(const std::vector<int>& dims) {
Reshape(ToVectorTIndex(dims));
Reshape(ToVectorint64_t(dims));
}
/**
@ -674,7 +674,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
/**
* Returns the size (i.e. the number of items) of the tensor.
*/
inline TIndex size() const {
inline int64_t size() const {
return numel_;
}
/**
@ -701,19 +701,19 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
/**
* Returns the dimensions of the tensor as a vector.
*/
inline const std::vector<TIndex>& dims() const {
inline const std::vector<int64_t>& dims() const {
return dims_;
}
inline TIndex size_from_dim(int k) const {
inline int64_t size_from_dim(int k) const {
return size_from_dim_(k, dims_);
}
inline TIndex size_to_dim(int k) const {
inline int64_t size_to_dim(int k) const {
return size_to_dim_(k, dims_);
}
inline TIndex size_between_dim(int k, int l) const {
inline int64_t size_between_dim(int k, int l) const {
return size_between_dim_(k, l, dims_);
}
@ -772,7 +772,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
/**
* Returns the i-th dimension of the tensor in int.
*
* This function returns an int value instead of TIndex, which depending on
* This function returns an int value instead of int64_t, which depending on
* the typedef could be int64. If you want int64 dim values, make sure you
* call dim() instead.
*/
@ -790,7 +790,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
* must be between 0 (inclusive) and the number of dimensions, otherwise
* this function will produce a fatal message.
*/
inline TIndex dim(const int i) const {
inline int64_t dim(const int i) const {
#ifndef NDEBUG
CAFFE_ENFORCE_LT_WITH_CALLER(i, dims_.size(), "Exceeding ndim limit");
CAFFE_ENFORCE_GE_WITH_CALLER(i, 0, "Cannot have negative dimension index");
@ -818,9 +818,9 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
protected:
// TODO: change to DimVector
std::vector<TIndex> dims_; // sizes_
std::vector<int64_t> dims_; // sizes_
at::DimVector strides_;
TIndex numel_ = -1; // numel_
int64_t numel_ = -1; // numel_
bool is_contiguous_ = true;
// we decide to keep reserved_ and it will
// live in Tensor after the split
@ -838,7 +838,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
bool SetDims(const std::vector<T>& src) {
auto old_numel = numel_;
dims_.resize(src.size());
TIndex new_numel = 1;
int64_t new_numel = 1;
for (size_t i = 0; i < src.size(); ++i) {
new_numel *= src[i];
dims_[i] = src[i];
@ -859,7 +859,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
// TODO(jiayq): maybe rewrite the following functions with initializer list.
// NVCC does not play well with initializer lists last time, but worth
// another shot.
bool SetDims(const TIndex d0) {
bool SetDims(const int64_t d0) {
auto old_numel = numel_;
dims_.resize(1);
dims_[0] = d0;
@ -868,7 +868,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
return numel_ != old_numel;
}
bool SetDims(const TIndex d0, const TIndex d1) {
bool SetDims(const int64_t d0, const int64_t d1) {
auto old_numel = numel_;
dims_.resize(2);
dims_[0] = d0;
@ -878,7 +878,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
return numel_ != old_numel;
}
bool SetDims(const TIndex d0, const TIndex d1, const TIndex d2) {
bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2) {
auto old_numel = numel_;
dims_.resize(3);
dims_[0] = d0;
@ -890,7 +890,7 @@ class CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
}
bool
SetDims(const TIndex d0, const TIndex d1, const TIndex d2, const TIndex d3) {
SetDims(const int64_t d0, const int64_t d1, const int64_t d2, const int64_t d3) {
auto old_numel = numel_;
dims_.resize(4);
dims_[0] = d0;

View File

@ -232,7 +232,7 @@ class MaxPoolRTCOp final : public ConvPoolOpBase<CUDAContext> {
private:
MaxPoolRTCFunction func_;
vector<TIndex> input_dims_;
vector<int64_t> input_dims_;
};
class MaxPoolGradientRTCOp final : public ConvPoolOpBase<CUDAContext> {
@ -285,7 +285,7 @@ class MaxPoolGradientRTCOp final : public ConvPoolOpBase<CUDAContext> {
private:
MaxPoolGradientRTCFunction func_;
vector<TIndex> input_dims_;
vector<int64_t> input_dims_;
};
namespace {

View File

@ -29,8 +29,8 @@ namespace caffe2 {
using Shape = std::array<int, N>;
template<int N>
const std::vector<TIndex>& shape(Shape<N> vs) {
static thread_local std::vector<TIndex> cache;
const std::vector<int64_t>& shape(Shape<N> vs) {
static thread_local std::vector<int64_t> cache;
cache.resize(vs.size());
for (auto i = 0; i < vs.size(); ++i) {
cache[i] = vs[i];
@ -38,11 +38,11 @@ namespace caffe2 {
return cache;
}
inline const std::vector<TIndex>& shape(int i) {
inline const std::vector<int64_t>& shape(int i) {
return shape<1>(Shape<1>({i}));
}
inline const std::vector<TIndex>& shape(int i, int j) {
inline const std::vector<int64_t>& shape(int i, int j) {
return shape<2>(Shape<2>({i, j}));
}
@ -177,7 +177,7 @@ namespace caffe2 {
Y->template mutable_data<T>(), &context_);
if (OutputSize() == 2){
auto* Comp_rate = Output(1);
Comp_rate->Resize(vector<TIndex>());
Comp_rate->Resize(vector<int64_t>());
T* comp_data = Comp_rate->template mutable_data<T>();
math::Sum<T, Context>(
Mask.size(), Mask.template data<T>(), comp_data, &context_);
@ -262,7 +262,7 @@ namespace caffe2 {
0, dW->template mutable_data<T>(),
&context_);
comp_r_buf_.Resize(vector<TIndex>());
comp_r_buf_.Resize(vector<int64_t>());
T* comp_data = comp_r_buf_.template mutable_data<T>();
math::Sum<T, Context>(
Mask.size(), Mask.template data<T>(), comp_data, &context_);

View File

@ -32,8 +32,8 @@ template<int N>
using Shape = std::array<int, N>;
template<int N>
const std::vector<TIndex>& shape(Shape<N> vs) {
static thread_local std::vector<TIndex> cache;
const std::vector<int64_t>& shape(Shape<N> vs) {
static thread_local std::vector<int64_t> cache;
cache.resize(vs.size());
for (auto i = 0; i < vs.size(); ++i) {
cache[i] = vs[i];
@ -41,11 +41,11 @@ const std::vector<TIndex>& shape(Shape<N> vs) {
return cache;
}
inline const std::vector<TIndex>& shape(int i) {
inline const std::vector<int64_t>& shape(int i) {
return shape<1>(Shape<1>({i}));
}
inline const std::vector<TIndex>& shape(int i, int j) {
inline const std::vector<int64_t>& shape(int i, int j) {
return shape<2>(Shape<2>({i, j}));
}

View File

@ -37,9 +37,9 @@ class FunHashOp : public Operator<Context> {
FunHashOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
num_outputs_(
OperatorBase::GetSingleArgument<TIndex>("num_outputs", -1)),
OperatorBase::GetSingleArgument<int64_t>("num_outputs", -1)),
num_segments_(
OperatorBase::GetSingleArgument<TIndex>("num_segments", -1)),
OperatorBase::GetSingleArgument<int64_t>("num_segments", -1)),
seed_(OperatorBase::GetSingleArgument<uint64_t>("seed", 0)) {
CAFFE_ENFORCE(
OperatorBase::HasArgument("num_outputs"),
@ -54,7 +54,7 @@ class FunHashOp : public Operator<Context> {
const auto& seg = Input(2);
const auto& weight = Input(3);
TIndex num_alpha = 1;
int64_t num_alpha = 1;
if (adaptive_) {
const auto& alpha = Input(4);
num_alpha = alpha.dim(0);
@ -62,12 +62,12 @@ class FunHashOp : public Operator<Context> {
const auto* seg_data = seg.template data<int>();
TIndex num_weight = weight.dim(0);
TIndex num_nz_ent = seg.dim(0);
int64_t num_weight = weight.dim(0);
int64_t num_nz_ent = seg.dim(0);
TIndex n_segments = num_segments_;
int64_t n_segments = num_segments_;
if (num_segments_ == -1) {
for (TIndex i = 0; i < num_nz_ent; ++i) {
for (int64_t i = 0; i < num_nz_ent; ++i) {
if (seg_data[i] > n_segments) {
n_segments = seg_data[i];
}
@ -85,16 +85,16 @@ class FunHashOp : public Operator<Context> {
const auto* weight_data = weight.template data<T>();
const auto* alpha_data = adaptive_ ? Input(4).template data<T>() : 0;
const auto* val_data = val.template data<T>();
const auto* key_data = key.template data<TIndex>();
const auto* key_data = key.template data<int64_t>();
for (TIndex j = 0; j < num_nz_ent; ++j) {
TIndex cur_seg = seg_data[j];
TIndex cur_key = key_data[j];
for (int64_t j = 0; j < num_nz_ent; ++j) {
int64_t cur_seg = seg_data[j];
int64_t cur_key = key_data[j];
T cur_val = val_data[j];
TIndex output_stride = cur_seg * num_outputs_;
for (TIndex i = 0; i < num_outputs_; ++i) {
int64_t output_stride = cur_seg * num_outputs_;
for (int64_t i = 0; i < num_outputs_; ++i) {
T sum = 0;
for (TIndex k = 0; k < num_alpha; ++k) {
for (int64_t k = 0; k < num_alpha; ++k) {
uint64_t hash;
// The hash function takes as input four integers:
// 1. feature index
@ -108,7 +108,7 @@ class FunHashOp : public Operator<Context> {
hash_data[3] = INDEX_MAGIC;
hash = XXH64(hash_data.data(), hash_data.size(), seed_);
TIndex index = hash % num_weight;
int64_t index = hash % num_weight;
T cur_weight = weight_data[index];
#ifdef USE_SIGN
@ -133,8 +133,8 @@ class FunHashOp : public Operator<Context> {
}
protected:
TIndex num_outputs_;
TIndex num_segments_;
int64_t num_outputs_;
int64_t num_segments_;
uint64_t seed_;
std::array<uint64_t, 4> hash_data;
bool adaptive_;
@ -147,7 +147,7 @@ class FunHashGradientOp : public Operator<Context> {
FunHashGradientOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
num_outputs_(
OperatorBase::GetSingleArgument<TIndex>("num_outputs", -1)),
OperatorBase::GetSingleArgument<int64_t>("num_outputs", -1)),
seed_(OperatorBase::GetSingleArgument<uint64_t>("seed", 0)) {
adaptive_ = (InputSize() == 6);
}
@ -159,7 +159,7 @@ class FunHashGradientOp : public Operator<Context> {
const auto& seg = Input(3);
const auto& weight = Input(4);
TIndex num_alpha = 1;
int64_t num_alpha = 1;
T* grad_alpha_data = 0;
if (adaptive_) {
@ -173,8 +173,8 @@ class FunHashGradientOp : public Operator<Context> {
const auto* seg_data = seg.template data<int>();
TIndex num_weight = weight.dim(0);
TIndex num_nz_ent = seg.dim(0);
int64_t num_weight = weight.dim(0);
int64_t num_nz_ent = seg.dim(0);
auto* grad_weight = Output(0);
grad_weight->ResizeLike(weight);
@ -184,18 +184,18 @@ class FunHashGradientOp : public Operator<Context> {
const auto* weight_data = weight.template data<T>();
const auto* alpha_data = adaptive_ ? Input(5).template data<T>() : 0;
const auto* val_data = val.template data<T>();
const auto* key_data = key.template data<TIndex>();
const auto* key_data = key.template data<int64_t>();
memset(grad_weight_data, 0, sizeof(T) * num_weight);
for (TIndex j = 0; j < num_nz_ent; ++j) {
TIndex cur_seg = seg_data[j];
TIndex cur_key = key_data[j];
for (int64_t j = 0; j < num_nz_ent; ++j) {
int64_t cur_seg = seg_data[j];
int64_t cur_key = key_data[j];
T cur_val = val_data[j];
TIndex grad_out_stride = cur_seg * num_outputs_;
for (TIndex i = 0; i < num_outputs_; ++i) {
int64_t grad_out_stride = cur_seg * num_outputs_;
for (int64_t i = 0; i < num_outputs_; ++i) {
T grad_out_scale = grad_out_data[grad_out_stride + i] * cur_val;
for (TIndex k = 0; k < num_alpha; ++k) {
for (int64_t k = 0; k < num_alpha; ++k) {
uint64_t hash;
hash_data[0] = cur_key;
hash_data[1] = i;
@ -203,7 +203,7 @@ class FunHashGradientOp : public Operator<Context> {
hash_data[3] = INDEX_MAGIC;
hash = XXH64(hash_data.data(), hash_data.size(), seed_);
TIndex index = hash % num_weight;
int64_t index = hash % num_weight;
T cur_grad_out_scale = grad_out_scale;
#ifdef USE_SIGN
@ -227,7 +227,7 @@ class FunHashGradientOp : public Operator<Context> {
}
protected:
TIndex num_outputs_;
int64_t num_outputs_;
uint64_t seed_;
std::array<uint64_t, 4> hash_data;
bool adaptive_;

View File

@ -36,9 +36,9 @@ class SparseFunHashOp : public Operator<Context> {
SparseFunHashOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
num_outputs_(
OperatorBase::GetSingleArgument<TIndex>("num_outputs", -1)),
OperatorBase::GetSingleArgument<int64_t>("num_outputs", -1)),
num_segments_(
OperatorBase::GetSingleArgument<TIndex>("num_segments", -1)),
OperatorBase::GetSingleArgument<int64_t>("num_segments", -1)),
seed_(OperatorBase::GetSingleArgument<uint64_t>("seed", 0)) {
CAFFE_ENFORCE(
OperatorBase::HasArgument("num_outputs"),
@ -53,7 +53,7 @@ class SparseFunHashOp : public Operator<Context> {
const auto& seg = Input(2);
const auto& weight = Input(3);
TIndex num_alpha = 1;
int64_t num_alpha = 1;
if (adaptive_) {
const auto& alpha = Input(4);
num_alpha = alpha.dim(0);
@ -61,12 +61,12 @@ class SparseFunHashOp : public Operator<Context> {
const auto* seg_data = seg.template data<int>();
TIndex num_weight = weight.dim(0);
TIndex num_nz_ent = seg.dim(0);
int64_t num_weight = weight.dim(0);
int64_t num_nz_ent = seg.dim(0);
TIndex n_segments = num_segments_;
int64_t n_segments = num_segments_;
if (num_segments_ == -1) {
for (TIndex i = 0; i < num_nz_ent; ++i) {
for (int64_t i = 0; i < num_nz_ent; ++i) {
if (seg_data[i] > n_segments) {
n_segments = seg_data[i];
}
@ -84,16 +84,16 @@ class SparseFunHashOp : public Operator<Context> {
const auto* weight_data = weight.template data<T>();
const auto* alpha_data = adaptive_ ? Input(4).template data<T>() : 0;
const auto* val_data = val.template data<T>();
const auto* key_data = key.template data<TIndex>();
const auto* key_data = key.template data<int64_t>();
for (TIndex j = 0; j < num_nz_ent; ++j) {
TIndex cur_seg = seg_data[j];
TIndex cur_key = key_data[j];
for (int64_t j = 0; j < num_nz_ent; ++j) {
int64_t cur_seg = seg_data[j];
int64_t cur_key = key_data[j];
T cur_val = val_data[j];
TIndex output_stride = cur_seg * num_outputs_;
for (TIndex i = 0; i < num_outputs_; ++i) {
int64_t output_stride = cur_seg * num_outputs_;
for (int64_t i = 0; i < num_outputs_; ++i) {
T sum = 0;
for (TIndex k = 0; k < num_alpha; ++k) {
for (int64_t k = 0; k < num_alpha; ++k) {
// The hash function takes as input three integers:
// 1. feature index
// 2. output index
@ -108,13 +108,13 @@ class SparseFunHashOp : public Operator<Context> {
#ifdef USE_SIGN
// Use the least significant bit for sign, the rest for weights.
TIndex index = (hash >> 1) % num_weight;
int64_t index = (hash >> 1) % num_weight;
T cur_weight = weight_data[index];
if (hash & 1) {
cur_weight = -cur_weight;
}
#else
TIndex index = hash % num_weight;
int64_t index = hash % num_weight;
T cur_weight = weight_data[index];
#endif
@ -132,8 +132,8 @@ class SparseFunHashOp : public Operator<Context> {
}
protected:
TIndex num_outputs_;
TIndex num_segments_;
int64_t num_outputs_;
int64_t num_segments_;
uint64_t seed_;
std::array<uint64_t, 4> hash_data;
bool adaptive_;
@ -146,7 +146,7 @@ class SparseFunHashGradientOp : public Operator<Context> {
SparseFunHashGradientOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
num_outputs_(
OperatorBase::GetSingleArgument<TIndex>("num_outputs", -1)),
OperatorBase::GetSingleArgument<int64_t>("num_outputs", -1)),
seed_(OperatorBase::GetSingleArgument<uint64_t>("seed", 0)) {
adaptive_ = (InputSize() == 6);
}
@ -158,7 +158,7 @@ class SparseFunHashGradientOp : public Operator<Context> {
const auto& seg = Input(3);
const auto& weight = Input(4);
TIndex num_alpha = 1;
int64_t num_alpha = 1;
T* grad_alpha_data = 0;
if (adaptive_) {
@ -172,10 +172,10 @@ class SparseFunHashGradientOp : public Operator<Context> {
const auto* seg_data = seg.template data<int>();
TIndex num_weight = weight.dim(0);
TIndex num_nz_ent = seg.dim(0);
int64_t num_weight = weight.dim(0);
int64_t num_nz_ent = seg.dim(0);
TIndex grad_weight_size = num_nz_ent * num_outputs_ * num_alpha;
int64_t grad_weight_size = num_nz_ent * num_outputs_ * num_alpha;
auto* grad_weight_val = Output(0);
grad_weight_val->Resize(grad_weight_size);
T* grad_weight_val_data = grad_weight_val->template mutable_data<T>();
@ -183,23 +183,23 @@ class SparseFunHashGradientOp : public Operator<Context> {
auto* grad_weight_ind = Output(1);
grad_weight_ind->Resize(grad_weight_size);
auto* grad_weight_ind_data =
grad_weight_ind->template mutable_data<TIndex>();
grad_weight_ind->template mutable_data<int64_t>();
const auto* grad_out_data = grad_out.template data<T>();
const auto* weight_data = weight.template data<T>();
const auto* alpha_data = adaptive_ ? Input(5).template data<T>() : 0;
const auto* val_data = val.template data<T>();
const auto* key_data = key.template data<TIndex>();
const auto* key_data = key.template data<int64_t>();
TIndex w_ind = 0;
for (TIndex j = 0; j < num_nz_ent; ++j) {
TIndex cur_seg = seg_data[j];
TIndex cur_key = key_data[j];
int64_t w_ind = 0;
for (int64_t j = 0; j < num_nz_ent; ++j) {
int64_t cur_seg = seg_data[j];
int64_t cur_key = key_data[j];
T cur_val = val_data[j];
TIndex grad_out_stride = cur_seg * num_outputs_;
for (TIndex i = 0; i < num_outputs_; ++i) {
int64_t grad_out_stride = cur_seg * num_outputs_;
for (int64_t i = 0; i < num_outputs_; ++i) {
T grad_out_scale = grad_out_data[grad_out_stride + i] * cur_val;
for (TIndex k = 0; k < num_alpha; ++k) {
for (int64_t k = 0; k < num_alpha; ++k) {
hash_data[0] = cur_key;
hash_data[1] = i;
hash_data[2] = k;
@ -209,12 +209,12 @@ class SparseFunHashGradientOp : public Operator<Context> {
T cur_grad_out_scale = grad_out_scale;
#ifdef USE_SIGN
TIndex index = (hash >> 1) % num_weight;
int64_t index = (hash >> 1) % num_weight;
if (hash & 1) {
cur_grad_out_scale = -cur_grad_out_scale;
}
#else
TIndex index = hash % num_weight;
int64_t index = hash % num_weight;
#endif
if (adaptive_) {
@ -232,7 +232,7 @@ class SparseFunHashGradientOp : public Operator<Context> {
}
protected:
TIndex num_outputs_;
int64_t num_outputs_;
uint64_t seed_;
std::array<uint64_t, 4> hash_data;
bool adaptive_;

View File

@ -36,10 +36,10 @@ class SparseMatrixReshapeOp : public Operator<Context> {
OperatorBase::HasArgument("new_shape"),
"Argument `new_shape` is missing.");
vector<TIndex> old_shape =
OperatorBase::GetRepeatedArgument<TIndex>("old_shape");
vector<TIndex> new_shape =
OperatorBase::GetRepeatedArgument<TIndex>("new_shape");
vector<int64_t> old_shape =
OperatorBase::GetRepeatedArgument<int64_t>("old_shape");
vector<int64_t> new_shape =
OperatorBase::GetRepeatedArgument<int64_t>("new_shape");
CAFFE_ENFORCE(
old_shape.size() == 2,
@ -63,7 +63,7 @@ class SparseMatrixReshapeOp : public Operator<Context> {
old_shape[0] > 0,
"The first dimension in `old_shape` must be positive.");
TIndex matrix_size = old_shape[0] * old_shape[1];
int64_t matrix_size = old_shape[0] * old_shape[1];
if (new_shape[0] == -1) {
CAFFE_ENFORCE(
@ -106,14 +106,14 @@ class SparseMatrixReshapeOp : public Operator<Context> {
new_col->Resize(nnz);
new_row->Resize(nnz);
const auto* old_col_data = old_col.template data<TIndex>();
const auto* old_col_data = old_col.template data<int64_t>();
const auto* old_row_data = old_row.template data<int>();
auto* new_col_data = new_col->template mutable_data<TIndex>();
auto* new_col_data = new_col->template mutable_data<int64_t>();
auto* new_row_data = new_row->template mutable_data<int>();
for (int i = 0; i < nnz; ++i) {
TIndex offset = old_row_data[i] * old_stride_ + old_col_data[i];
int64_t offset = old_row_data[i] * old_stride_ + old_col_data[i];
new_row_data[i] = offset / new_stride_;
new_col_data[i] = offset % new_stride_;
}
@ -122,8 +122,8 @@ class SparseMatrixReshapeOp : public Operator<Context> {
}
private:
TIndex old_stride_;
TIndex new_stride_;
int64_t old_stride_;
int64_t new_stride_;
};
} // namespace caffe2

View File

@ -29,9 +29,9 @@ class TTContractionOp final : public Operator<Context> {
USE_OPERATOR_CONTEXT_FUNCTIONS;
TTContractionOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
K_(OperatorBase::GetSingleArgument<TIndex>("K", 0)),
M_(OperatorBase::GetSingleArgument<TIndex>("M", 0)),
N_(OperatorBase::GetSingleArgument<TIndex>("N", 0)) {
K_(OperatorBase::GetSingleArgument<int64_t>("K", 0)),
M_(OperatorBase::GetSingleArgument<int64_t>("M", 0)),
N_(OperatorBase::GetSingleArgument<int64_t>("N", 0)) {
CAFFE_ENFORCE(OperatorBase::HasArgument("K"), "Argument `K` is missing.");
CAFFE_ENFORCE(OperatorBase::HasArgument("M"), "Argument `M` is missing.");
CAFFE_ENFORCE(OperatorBase::HasArgument("N"), "Argument `N` is missing.");
@ -44,8 +44,8 @@ class TTContractionOp final : public Operator<Context> {
CAFFE_ENFORCE(A.ndim() == 2, A.ndim());
TIndex A_size = A.size_from_dim(0);
TIndex B_size = B.size_from_dim(0);
int64_t A_size = A.size_from_dim(0);
int64_t B_size = B.size_from_dim(0);
CAFFE_ENFORCE(
K_ * M_ == A_size,
@ -55,19 +55,19 @@ class TTContractionOp final : public Operator<Context> {
B_size % (K_ * N_) == 0,
"Argument `K` and `N` do not agree with the size of B.");
TIndex D_ = B_size / (K_ * N_);
int64_t D_ = B_size / (K_ * N_);
TIndex C_size = D_ * M_ * N_;
C->Resize(vector<TIndex>{C_size});
int64_t C_size = D_ * M_ * N_;
C->Resize(vector<int64_t>{C_size});
TIndex B_stride = K_ * N_;
TIndex C_stride = M_ * N_;
int64_t B_stride = K_ * N_;
int64_t C_stride = M_ * N_;
const T* A_data = A.template data<T>();
const T* B_data = B.template data<T>();
T* C_data = C->template mutable_data<T>();
for (TIndex B_index = 0; B_index < B_size; B_index += B_stride) {
for (int64_t B_index = 0; B_index < B_size; B_index += B_stride) {
math::Gemm<T, Context, Engine>(
CblasTrans,
CblasNoTrans,
@ -84,9 +84,9 @@ class TTContractionOp final : public Operator<Context> {
}
protected:
TIndex K_;
TIndex M_;
TIndex N_;
int64_t K_;
int64_t M_;
int64_t N_;
};
template <typename T, class Context, class Engine = DefaultEngine>
@ -95,9 +95,9 @@ class TTContractionGradientOp final : public Operator<Context> {
USE_OPERATOR_CONTEXT_FUNCTIONS;
TTContractionGradientOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
K_(OperatorBase::GetSingleArgument<TIndex>("K", 0)),
M_(OperatorBase::GetSingleArgument<TIndex>("M", 0)),
N_(OperatorBase::GetSingleArgument<TIndex>("N", 0)) {}
K_(OperatorBase::GetSingleArgument<int64_t>("K", 0)),
M_(OperatorBase::GetSingleArgument<int64_t>("M", 0)),
N_(OperatorBase::GetSingleArgument<int64_t>("N", 0)) {}
bool RunOnDevice() override {
const auto& G = Input(0);
@ -106,16 +106,16 @@ class TTContractionGradientOp final : public Operator<Context> {
auto* dA = Output(0);
auto* dB = Output(1);
TIndex G_size = G.size_from_dim(0);
TIndex D_ = G_size / (M_ * N_);
int64_t G_size = G.size_from_dim(0);
int64_t D_ = G_size / (M_ * N_);
TIndex dB_size = D_ * K_ * N_;
int64_t dB_size = D_ * K_ * N_;
dA->Resize(A.dims());
dB->Resize(B.dims());
TIndex B_stride = K_ * N_;
TIndex G_stride = M_ * N_;
int64_t B_stride = K_ * N_;
int64_t G_stride = M_ * N_;
const T* G_data = G.template data<T>();
const T* A_data = A.template data<T>();
@ -125,7 +125,7 @@ class TTContractionGradientOp final : public Operator<Context> {
T* dB_data = dB->template mutable_data<T>();
const T* G_ptr = G_data;
for (TIndex B_index = 0; B_index < dB_size; B_index += B_stride) {
for (int64_t B_index = 0; B_index < dB_size; B_index += B_stride) {
math::Gemm<T, Context, Engine>(
CblasNoTrans,
CblasTrans,
@ -139,7 +139,7 @@ class TTContractionGradientOp final : public Operator<Context> {
}
G_ptr = G_data;
for (TIndex B_index = 0; B_index < dB_size; B_index += B_stride) {
for (int64_t B_index = 0; B_index < dB_size; B_index += B_stride) {
math::Gemm<T, Context, Engine>(
CblasNoTrans,
CblasNoTrans,
@ -156,9 +156,9 @@ class TTContractionGradientOp final : public Operator<Context> {
}
protected:
TIndex K_;
TIndex M_;
TIndex N_;
int64_t K_;
int64_t M_;
int64_t N_;
};
} // namespace caffe2

View File

@ -29,7 +29,7 @@ class TTPadOp final : public Operator<Context> {
USE_OPERATOR_CONTEXT_FUNCTIONS;
TTPadOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
scale_(OperatorBase::GetSingleArgument<TIndex>("scale", 0)) {
scale_(OperatorBase::GetSingleArgument<int64_t>("scale", 0)) {
CAFFE_ENFORCE(
OperatorBase::HasArgument("scale"), "Argument `scale` is missing.");
}
@ -46,16 +46,16 @@ class TTPadOp final : public Operator<Context> {
auto* X_orig_dim0 = Output(1);
X_orig_dim0->Resize(1);
*X_orig_dim0->template mutable_data<TIndex>() = X_dim0;
*X_orig_dim0->template mutable_data<int64_t>() = X_dim0;
if (X_dim0 % scale_ != 0) {
TIndex padded_dim0 = (X_dim0 / scale_ + 1) * scale_;
int64_t padded_dim0 = (X_dim0 / scale_ + 1) * scale_;
auto dim0_diff = padded_dim0 - X_dim0;
// set growthPct to the upper bound percentage: (100 * scale_ / X_dim0)
X_pad->Extend(dim0_diff, 100 * scale_ / X_dim0, &context_);
auto* X_pad_data = X_pad->template mutable_data<T>();
TIndex X_size = X_dim0 * X_dim1;
int64_t X_size = X_dim0 * X_dim1;
memset(X_pad_data + X_size, 0, dim0_diff * X_dim1 * sizeof(T));
}
@ -63,7 +63,7 @@ class TTPadOp final : public Operator<Context> {
}
protected:
TIndex scale_;
int64_t scale_;
};
template <typename T, class Context, class Engine = DefaultEngine>
@ -78,7 +78,7 @@ class TTPadGradientOp final : public Operator<Context> {
auto* output = Output(0);
CAFFE_ENFORCE(&G == output);
auto old_dim0 = *Input(1).template data<TIndex>();
auto old_dim0 = *Input(1).template data<int64_t>();
auto new_dim0 = G.dim(0);
auto dim1 = G.dim(1);

View File

@ -43,7 +43,7 @@ class IDEEPConcatOp final : public IDEEPOperator {
}
auto axis_vdata = ideep::concat::compute(inputs, axis_, add_axis_, *output);
axis_info->Resize(vector<TIndex>(1, InputSize()));
axis_info->Resize(vector<int64_t>(1, InputSize()));
int* axis_data = axis_info->template mutable_data<int>();
for (int i = 0; i < axis_vdata.size(); i++) {
axis_data[i] = axis_vdata[i];

View File

@ -39,7 +39,7 @@ class IDEEPConvPoolOpBase : public ConvPoolOpBase<IDEEPContext> {
ideep::tensor::dims output_dims;
auto input_dims = input.get_dims();
vector<TIndex> input_Tdims (input_dims.begin(), input_dims.end());
vector<int64_t> input_Tdims (input_dims.begin(), input_dims.end());
InferOutputSize(
input_Tdims,
output_channel,

View File

@ -35,7 +35,7 @@ class IDEEPSqueezeOp final : public IDEEPOperator {
(dims_.back() + 1),
" dimensions.");
const auto& ideep_dims = X.get_dims();
vector<TIndex> dims(ideep_dims.begin(), ideep_dims.end());
vector<int64_t> dims(ideep_dims.begin(), ideep_dims.end());
const auto& new_dims = SqueezeOp<IDEEPContext>::ComputeDims(dims, dims_);
itensor::dims new_dims_ideep(new_dims.begin(), new_dims.end());
if (&X != Y) {

View File

@ -372,14 +372,14 @@ ImageInputOp<Context>::ImageInputOp(
randgen_per_thread_.emplace_back(meta_randgen());
}
prefetched_image_.Resize(
TIndex(batch_size_),
TIndex(crop_),
TIndex(crop_),
TIndex(color_ ? 3 : 1));
int64_t(batch_size_),
int64_t(crop_),
int64_t(crop_),
int64_t(color_ ? 3 : 1));
if (label_type_ != SINGLE_LABEL && label_type_ != SINGLE_LABEL_WEIGHTED) {
prefetched_label_.Resize(TIndex(batch_size_), TIndex(num_labels_));
prefetched_label_.Resize(int64_t(batch_size_), int64_t(num_labels_));
} else {
prefetched_label_.Resize(vector<TIndex>(1, batch_size_));
prefetched_label_.Resize(vector<int64_t>(1, batch_size_));
}
for (int i = 0; i < additional_output_sizes.size(); ++i) {
@ -387,7 +387,7 @@ ImageInputOp<Context>::ImageInputOp(
Context::GetDeviceType());
prefetched_additional_outputs_.emplace_back(CPU);
prefetched_additional_outputs_[i].Resize(
TIndex(batch_size_), TIndex(additional_output_sizes[i]));
int64_t(batch_size_), int64_t(additional_output_sizes[i]));
}
}

View File

@ -23,10 +23,10 @@ TEST(MKLDNNTest, SimpleConvolutionTest) {
int pads[2] = {0, 0};
// Creating Input and output tensors
Tensor X(vector<TIndex>{16, 8, 32, 32}, CPU);
Tensor W(vector<TIndex>{64, 8, 3, 3}, CPU);
Tensor b(vector<TIndex>{64}, CPU);
Tensor Y(vector<TIndex>{16, 64, 30, 30}, CPU);
Tensor X(vector<int64_t>{16, 8, 32, 32}, CPU);
Tensor W(vector<int64_t>{64, 8, 3, 3}, CPU);
Tensor b(vector<int64_t>{64}, CPU);
Tensor Y(vector<int64_t>{16, 64, 30, 30}, CPU);
float* data = X.mutable_data<float>();
for (int i = 0; i < X.size(); ++i) {
@ -91,7 +91,7 @@ TEST(MKLDNNTest, MKLMemoryCopyTest) {
// the buffer size being empty for both - former in dnnAllocateBuffer and
// the latter in dnnConversionExecute (likely due to some difference in
// layout?). Test both cases.
vector<vector<TIndex>> dims_list{{10, 3, 20, 20}, {0}, {0, 10}};
vector<vector<int64_t>> dims_list{{10, 3, 20, 20}, {0}, {0, 10}};
for (const auto& dims : dims_list) {
auto X_cpu_in = caffe2::make_unique<Tensor>(dims, CPU);
CPUContext ctx;

View File

@ -84,8 +84,8 @@ class MKLMemoryDeserializer : public BlobDeserializerBase {
"MKLMemory only supports either float or double formats.");
CAFFE_ENFORCE(
!proto.has_segment(), "MKLMemory does not support segment right now.");
vector<TIndex> dims;
for (const TIndex d : proto.dims()) {
vector<int64_t> dims;
for (const int64_t d : proto.dims()) {
dims.push_back(d);
}
// TODO: right now, every time we do a deserializer we create a new MKL

View File

@ -96,7 +96,7 @@ class MKLConcatOp final : public MKLOperator<T> {
private:
int axis_;
vector<TIndex> cached_output_dims_;
vector<int64_t> cached_output_dims_;
};
} // namespace mkl

View File

@ -37,7 +37,7 @@ class MKLConvOp final : public ConvPoolOpBase<MKLContext> {
math::Set<T, CPUContext>(
M, 0.0, cpu_zero_bias.template mutable_data<float>(), &ctx);
zero_bias_.reset(new MKLMemory<T>(std::vector<TIndex>{M}));
zero_bias_.reset(new MKLMemory<T>(std::vector<int64_t>{M}));
zero_bias_->CopyFrom(cpu_zero_bias);
}
const auto& bias = InputSize() == 2
@ -130,11 +130,11 @@ class MKLConvOp final : public ConvPoolOpBase<MKLContext> {
if (group_ > 1) {
// Explicitly reformat the buffer.
MKLMemory<float> group_filter(
std::vector<TIndex>{TIndex(group_),
TIndex(filter.dim32(0) / group_),
TIndex(filter.dim32(1)),
TIndex(filter.dim32(2)),
TIndex(filter.dim32(3))},
std::vector<int64_t>{int64_t(group_),
int64_t(filter.dim32(0) / group_),
int64_t(filter.dim32(1)),
int64_t(filter.dim32(2)),
int64_t(filter.dim32(3))},
nullptr,
dnnResourceFilter,
/*share_memory_if_possible=*/true);
@ -168,8 +168,8 @@ class MKLConvOp final : public ConvPoolOpBase<MKLContext> {
// Input: X, W, b
// Output: Y
std::unique_ptr<MKLMemory<T>> zero_bias_;
vector<TIndex> cached_input_dims_;
vector<TIndex> cached_filter_dims_;
vector<int64_t> cached_input_dims_;
vector<int64_t> cached_filter_dims_;
PrimitiveWrapper<T> primitive_;
LayoutWrapper<T> input_layout_;
LayoutWrapper<T> filter_layout_;

View File

@ -106,8 +106,8 @@ class ConvMKLDNNOp final : public ConvPoolOpBase<CPUContext> {
private:
// Input: X, W, b
// Output: Y
vector<TIndex> cached_input_dims_;
vector<TIndex> cached_filter_dims_;
vector<int64_t> cached_input_dims_;
vector<int64_t> cached_filter_dims_;
PrimitiveWrapper<T> primitive_;
unique_ptr<MKLMemory<T>> X_wrapper_ = nullptr;
unique_ptr<MKLMemory<T>> filter_wrapper_ = nullptr;

View File

@ -64,7 +64,7 @@ class MKLSumOp final : public MKLOperator<T> {
private:
std::vector<float> coefficients_;
vector<TIndex> cached_input_dims_;
vector<int64_t> cached_input_dims_;
vector<std::shared_ptr<void>> input_views_;
};

View File

@ -90,8 +90,8 @@ class MKLFullyConnectedOp final : public MKLOperator<T> {
// Input: X, W, b
// Output: Y
size_t axis_{1};
vector<TIndex> cached_input_dims_;
vector<TIndex> cached_filter_dims_;
vector<int64_t> cached_input_dims_;
vector<int64_t> cached_filter_dims_;
PrimitiveWrapper<T> primitive_;
LayoutWrapper<T> input_layout_;
LayoutWrapper<T> filter_layout_;

View File

@ -19,7 +19,7 @@ class MKLLRNOp final : public LRNOpBase<T, MKLContext> {
bool RunOnDeviceWithOrderNHWC() override;
private:
vector<TIndex> cached_input_dims_;
vector<int64_t> cached_input_dims_;
LayoutWrapper<T> workspace_layout_;
std::unique_ptr<MKLWorkspace<T>> workspace_buffer_;
PrimitiveWrapper<T> primitive_;

View File

@ -141,7 +141,7 @@ class PackedFCOp final : public Operator<CPUContext> {
}
size_t axis_{1};
uint32_t hash_{0};
vector<TIndex> Y_shape_cache_;
vector<int64_t> Y_shape_cache_;
Tensor bias_multiplier_{CPU};
std::unique_ptr<MKLPackedMatrix> local_packed_matrix_;
};

View File

@ -41,8 +41,8 @@ class MKLPoolOp final : public ConvPoolOpBase<MKLContext> {
// Input: X
// Output: Y
private:
vector<TIndex> cached_input_dims_;
// vector<TIndex> cached_avgpool_input_dims_;
vector<int64_t> cached_input_dims_;
// vector<int64_t> cached_avgpool_input_dims_;
LayoutWrapper<T> workspace_layout_;
std::unique_ptr<MKLWorkspace<T>> workspace_buffer_;
PrimitiveWrapper<T> primitive_;

View File

@ -43,7 +43,7 @@ class MKLReluOp : public MKLOperator<T> {
}
private:
vector<TIndex> cached_input_dims_;
vector<int64_t> cached_input_dims_;
};
template <typename T>

View File

@ -146,7 +146,7 @@ class MKLBNOp final : public Operator<MKLContext> {
const StorageOrder order_;
const int num_batches_;
vector<TIndex> cached_input_dims_;
vector<int64_t> cached_input_dims_;
LayoutWrapper<T> scale_bias_layout_;
LayoutWrapper<T> saved_mean_layout_;
LayoutWrapper<T> saved_var_layout_;

View File

@ -57,7 +57,7 @@ class MKLSqueezeOp final : public MKLOperator<T> {
private:
vector<int> dims_;
vector<TIndex> cached_input_dims_;
vector<int64_t> cached_input_dims_;
};
} // namespace mkl

View File

@ -19,7 +19,7 @@ CAFFE_KNOWN_TYPE(mkl::MKLMemory<float>);
CAFFE_KNOWN_TYPE(mkl::MKLMemory<double>);
template <typename T>
static vector<TIndex> GetMKLTensorInfo(
static vector<int64_t> GetMKLTensorInfo(
const void* c,
size_t* capacity,
DeviceOption* device) {

View File

@ -5,8 +5,8 @@
#include <vector>
#include <mutex>
#include "caffe2/core/flags.h" // for TIndex
#include "caffe2/core/tensor.h" // for TIndex
#include "caffe2/core/flags.h" // for int64_t
#include "caffe2/core/tensor.h" // for int64_t
#include "caffe2/mkl/utils/mkl_dnn_cppwrapper.h"
// A global boolean variable that controls the behavior when we call View() on
@ -270,7 +270,7 @@ class MKLMemory {
"Reshape is not allowed for custom layouts. "
"Convert to plain layout before invoking Reshape().");
TIndex new_size = 1;
int64_t new_size = 1;
for (auto i = 0; i < dims.size(); ++i) {
CAFFE_ENFORCE_GE_WITH_CALLER(dims[i], 0);
new_size *= dims[i];
@ -279,7 +279,7 @@ class MKLMemory {
new_size == size_,
"New size and old size are not equal. Reshape is not possible.");
vector<TIndex> new_dims(dims.size());
vector<int64_t> new_dims(dims.size());
vector<size_t> size(dims.size());
vector<size_t> strides(dims.size());
for (int i = 0; i < dims.size(); ++i) {
@ -456,7 +456,7 @@ class MKLMemory {
return buffer_.get();
}
inline const vector<TIndex>& dims() const {
inline const vector<int64_t>& dims() const {
return dims_;
}
@ -470,7 +470,7 @@ class MKLMemory {
/**
* Returns the size (i.e., the number of items) in the buffer.
*/
inline TIndex size() const {
inline int64_t size() const {
return size_;
}
@ -479,7 +479,7 @@ class MKLMemory {
* must be between 0 (inclusive) and the number of dimensions, otherwise
* this function will produce a fatal message.
*/
inline TIndex dim(const int i) const {
inline int64_t dim(const int i) const {
return dims_.at(i);
}
@ -545,9 +545,9 @@ class MKLMemory {
mutable std::mutex buffer_lock_;
// The dimensions in the same order as Caffe2 does. This is used to
// interface with C2.
vector<TIndex> dims_;
vector<int64_t> dims_;
// Number of items in the buffer.
TIndex size_ = -1;
int64_t size_ = -1;
// The user dnn layout.
LayoutWrapper<T> user_layout_;
// The internal dnn layout.

View File

@ -97,7 +97,7 @@ class MKLOperator : public OperatorBase {
// The primitive used in the operator.
PrimitiveWrapper<T> primitive_;
// Size cache for all the input sizes.
vector<vector<TIndex>> input_size_cache_;
vector<vector<int64_t>> input_size_cache_;
// An internal MKLMemory buffer. This is usually handy when we have a
// single output from the operator. If your operator has multiple outputs
// then you should allocate your own buffer.

View File

@ -249,7 +249,7 @@ public:
const int32_t ndim() const { return dims_.size(); }
vector<TIndex> dims() const { return dims_; }
vector<int64_t> dims() const { return dims_; }
const int32_t dim32(const int index) const { return dims_.at(index); }
@ -283,7 +283,7 @@ private:
bool SetDims(const vector<TI> &src) {
auto old_size = size_;
dims_.resize(src.size());
TIndex new_size = 1;
int64_t new_size = 1;
for (unsigned int i = 0; i < src.size(); ++i) {
new_size *= src[i];
dims_[i] = src[i];
@ -299,7 +299,7 @@ private:
return size_ > old_size;
}
bool SetDims(const TIndex d0) {
bool SetDims(const int64_t d0) {
auto old_size = size_;
dims_.resize(1);
dims_[0] = d0;
@ -307,7 +307,7 @@ private:
return size_ > old_size;
}
bool SetDims(const TIndex d0, const TIndex d1) {
bool SetDims(const int64_t d0, const int64_t d1) {
auto old_size = size_;
dims_.resize(2);
dims_[0] = d0;
@ -316,7 +316,7 @@ private:
return size_ > old_size;
}
bool SetDims(const TIndex d0, const TIndex d1, const TIndex d2) {
bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2) {
auto old_size = size_;
dims_.resize(3);
dims_[0] = d0;
@ -326,8 +326,8 @@ private:
return size_ > old_size;
}
bool SetDims(const TIndex d0, const TIndex d1, const TIndex d2,
const TIndex d3) {
bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2,
const int64_t d3) {
auto old_size = size_;
dims_.resize(4);
dims_[0] = d0;
@ -338,8 +338,8 @@ private:
return size_ > old_size;
}
vector<TIndex> dims_;
TIndex size_ = -1;
vector<int64_t> dims_;
int64_t size_ = -1;
arm_compute::TensorShape shape_;
unique_ptr<arm_compute::GCTensor> tensor_;
};

View File

@ -40,7 +40,7 @@ bool GLFullyConnectedOp<T>::RunOnDevice() {
CAFFE_ENFORCE_EQ(1, B_->ndim());
CAFFE_ENFORCE_EQ(N, B_->dim32(0));
vector<TIndex> output_dims = {M, N};
vector<int64_t> output_dims = {M, N};
GLTensor<T> *Y =
OperatorBase::Outputs()[0]->template GetMutable<GLTensor<T>>();
if (first_run_) {

View File

@ -53,7 +53,7 @@ bool GLAveragePoolOp<DataType>::RunOnDeviceWithOrderNCHW() {
int height = X_->dim32(2);
int width = X_->dim32(3);
vector<TIndex> output_dims = {N, channels, 1, 1};
vector<int64_t> output_dims = {N, channels, 1, 1};
if (!global_pooling_) {
output_dims[2] = (height + pad_t() + pad_b() - kernel_h()) / stride_h() + 1;
output_dims[3] = (width + pad_l() + pad_r() - kernel_w()) / stride_w() + 1;
@ -116,7 +116,7 @@ template <> bool GLMaxPoolOp<DataType>::RunOnDeviceWithOrderNCHW() {
int height = X_->dim32(2);
int width = X_->dim32(3);
vector<TIndex> output_dims = {N, channels, 1, 1};
vector<int64_t> output_dims = {N, channels, 1, 1};
if (!global_pooling_) {
output_dims[2] = (height + pad_t() + pad_b() - kernel_h()) / stride_h() + 1;
output_dims[3] = (width + pad_l() + pad_r() - kernel_w()) / stride_w() + 1;

View File

@ -45,7 +45,7 @@ bool GLResizeNearestOp<T>::RunOnDevice() {
GLTensor<T> *Y =
OperatorBase::Outputs()[0]->template GetMutable<GLTensor<T>>();
vector<TIndex> output_dims = {N, C, H * height_scale_, W * width_scale_};
vector<int64_t> output_dims = {N, C, H * height_scale_, W * width_scale_};
if (first_run_) {
Y->Resize(output_dims);

View File

@ -329,7 +329,7 @@ class CopyToMPSCNNOp final : public Operator<CPUContext> {
for (auto i = 0; i < Inputs().size(); ++i) {
const auto& X = Input(i);
CAFFE_ENFORCE(X.ndim() > 0 && X.ndim() <= 4);
std::vector<TIndex> XDims = {1, 1, 1, 1};
std::vector<int64_t> XDims = {1, 1, 1, 1};
XDims.assign(X.dims().begin(), X.dims().end());
caffe2::Timer t;
@ -2259,15 +2259,15 @@ class MPSCNNGenerateProposalsCPPOp final : public Operator<CPUContext> {
// bbox_deltas: (num_images, A * 4, H, W)
CAFFE_ENFORCE_EQ(
bbox_deltas.dims(), (vector<TIndex>{num_images, 4 * A, height, width}));
bbox_deltas.dims(), (vector<int64_t>{num_images, 4 * A, height, width}));
// im_info_tensor: (num_images, 3), format [height, width, scale; ...]
CAFFE_ENFORCE_EQ(im_info_tensor.dims(), (vector<TIndex>{num_images, 3}));
CAFFE_ENFORCE_EQ(im_info_tensor.dims(), (vector<int64_t>{num_images, 3}));
CAFFE_ENFORCE(
im_info_tensor.template IsType<float>(), im_info_tensor.meta().name());
// anchors: (A, 4)
CAFFE_ENFORCE_EQ(anchors.dims(), (vector<TIndex>{A, 4}));
CAFFE_ENFORCE_EQ(anchors.dims(), (vector<int64_t>{A, 4}));
CAFFE_ENFORCE(anchors.template IsType<float>(), anchors.meta().name());
// Broadcast the anchors to all pixels
auto all_anchors_vec =

View File

@ -640,7 +640,7 @@ void testMPSCNN() {
CAFFE_ENFORCE_EQ(t1.ndim(), 2);
CAFFE_ENFORCE(t2.dim32(2) == 1 && t2.dim32(3) == 1);
const_cast<TensorCPU&>(t2).Reshape(
std::vector<TIndex>{TIndex(batchSize), TIndex(COut)});
std::vector<int64_t>{int64_t(batchSize), int64_t(COut)});
// Note dims do not match, as Metal leaves a 1x1 spatial
// dimension.
CAFFE_ENFORCE_EQ(t1.dims(), t2.dims());

View File

@ -12,7 +12,7 @@ namespace caffe2 {
namespace {
void AddNoiseInput(const vector<TIndex>& shape, const string& name, Workspace* ws) {
void AddNoiseInput(const vector<int64_t>& shape, const string& name, Workspace* ws) {
DeviceOption option;
CPUContext context(option);
Blob* blob = ws->CreateBlob(name);
@ -58,7 +58,7 @@ void compareMaxPooling(int N,
def1.add_arg()->CopyFrom(MakeArgument("pad_b", padB));
def1.add_arg()->CopyFrom(MakeArgument("pad_r", padR));
AddNoiseInput(vector<TIndex>{N, C, H, W}, "X", &ws);
AddNoiseInput(vector<int64_t>{N, C, H, W}, "X", &ws);
unique_ptr<OperatorBase> op1(CreateOperator(def1, &ws));
EXPECT_NE(nullptr, op1.get());

View File

@ -12,7 +12,7 @@ namespace caffe2 {
namespace {
void AddNoiseInput(const vector<TIndex>& shape, const string& name, Workspace* ws) {
void AddNoiseInput(const vector<int64_t>& shape, const string& name, Workspace* ws) {
DeviceOption option;
CPUContext context(option);
Blob* blob = ws->CreateBlob(name);
@ -44,7 +44,7 @@ void compareResizeNeareast(int N,
def1.add_arg()->CopyFrom(MakeArgument("width_scale", wscale));
def1.add_arg()->CopyFrom(MakeArgument("height_scale", hscale));
AddNoiseInput(vector<TIndex>{N, C, H, W}, "X", &ws);
AddNoiseInput(vector<int64_t>{N, C, H, W}, "X", &ws);
unique_ptr<OperatorBase> op1(CreateOperator(def1, &ws));
EXPECT_NE(nullptr, op1.get());

View File

@ -12,7 +12,7 @@
#include <vector>
void AddNoiseInput(const std::vector<caffe2::TIndex>& shape,
void AddNoiseInput(const std::vector<int64_t>& shape,
const std::string& name,
caffe2::Workspace* ws) {
caffe2::CPUContext context;
@ -60,13 +60,13 @@ double BenchOp(const std::string& typ,
def1.add_arg()->CopyFrom(caffe2::MakeArgument("pad_r", 0));
def1.add_arg()->CopyFrom(caffe2::MakeArgument("convolution_transform_strategy", std::string("PRECOMPUTE")));
AddNoiseInput(std::vector<caffe2::TIndex>{1, inputC, inH, inW}, "X", ws);
AddNoiseInput(std::vector<int64_t>{1, inputC, inH, inW}, "X", ws);
if (transposed) {
AddNoiseInput(std::vector<caffe2::TIndex>{inputC, outputC, kH, kW}, "W", ws);
AddNoiseInput(std::vector<int64_t>{inputC, outputC, kH, kW}, "W", ws);
} else {
AddNoiseInput(std::vector<caffe2::TIndex>{outputC, inputC, kH, kW}, "W", ws);
AddNoiseInput(std::vector<int64_t>{outputC, inputC, kH, kW}, "W", ws);
}
AddNoiseInput(std::vector<caffe2::TIndex>{outputC}, "B", ws);
AddNoiseInput(std::vector<int64_t>{outputC}, "B", ws);
std::unique_ptr<caffe2::OperatorBase> op1(CreateOperator(def1, ws));
@ -131,19 +131,19 @@ static double BenchGLConvolution(int input_channels,
}
AddNoiseInput(
std::vector<caffe2::TIndex>{1, input_channels, input_height, input_width}, "X_cpu", ws);
std::vector<int64_t>{1, input_channels, input_height, input_width}, "X_cpu", ws);
if (transposed) {
AddNoiseInput(
std::vector<caffe2::TIndex>{input_channels, output_channels, kernel_height, kernel_width},
std::vector<int64_t>{input_channels, output_channels, kernel_height, kernel_width},
"W",
ws);
} else {
AddNoiseInput(
std::vector<caffe2::TIndex>{output_channels, input_channels, kernel_height, kernel_width},
std::vector<int64_t>{output_channels, input_channels, kernel_height, kernel_width},
"W",
ws);
}
AddNoiseInput(std::vector<caffe2::TIndex>{output_channels}, "b", ws);
AddNoiseInput(std::vector<int64_t>{output_channels}, "b", ws);
caffe2::NetDef netdef;
{

View File

@ -36,7 +36,7 @@
namespace caffe2 {
void AddConstInput(const vector<TIndex>& shape,
void AddConstInput(const vector<int64_t>& shape,
const float value,
const string& name,
Workspace* ws) {
@ -50,7 +50,7 @@ void AddConstInput(const vector<TIndex>& shape,
&context);
}
void AddNoiseInput(const vector<TIndex>& shape,
void AddNoiseInput(const vector<int64_t>& shape,
const string& name,
Workspace* ws) {
DeviceOption option;
@ -72,7 +72,7 @@ float snpe_run(int iters, Workspace& ws) {
const int W = 227;
const int C = 3;
POPULATE_DATA("X_snpe", (caffe2::vector<caffe2::TIndex>{H, W, C}), hwc);
POPULATE_DATA("X_snpe", (caffe2::vector<int64_t>{H, W, C}), hwc);
OperatorDef def;
def.set_name("snpe_test");
@ -108,7 +108,7 @@ float caffe2_run(int iters, Workspace& ws) {
ReadProtoFromBinaryFile("/data/local/tmp/squeeze_init_net.pb", &init_net);
ReadProtoFromBinaryFile("/data/local/tmp/squeeze_predict_net.pb", &predict_net);
ws.RunNetOnce(init_net);
POPULATE_DATA("data", (caffe2::vector<caffe2::TIndex>{N, C, H, W}), chw);
POPULATE_DATA("data", (caffe2::vector<int64_t>{N, C, H, W}), chw);
predict_net.set_name("SqueezeNet");
ws.CreateNet(predict_net);

View File

@ -538,7 +538,7 @@ void run2b1bConvIm2ColGEMM(QConvState* state,
CAFFE_ENFORCE_EQ(Y->dim32(0), divRoundUp(X.dim32(0) * OH * OW, kGEMMTileSize) * kGEMMTileSize);
CAFFE_ENFORCE_EQ(Y->dim32(1), OC);
Y->ShrinkTo(X.dim32(0) * OH * OW);
Y->Reshape(std::vector<TIndex>{{TIndex(X.dim(0)), TIndex(OH), TIndex(OW), TIndex(OC)}});
Y->Reshape(std::vector<int64_t>{{int64_t(X.dim(0)), int64_t(OH), int64_t(OW), int64_t(OC)}});
}
}

View File

@ -62,7 +62,7 @@ int randInt(int a, int b) {
return std::uniform_int_distribution<int>(a, b)(gen);
}
TensorCPU genTensor11(std::vector<TIndex> shape) {
TensorCPU genTensor11(std::vector<int64_t> shape) {
Tensor r(CPU);
r.Resize(shape);
@ -76,7 +76,7 @@ TensorCPU genTensor11(std::vector<TIndex> shape) {
return r;
}
TensorCPU genTensorUniform11(std::vector<TIndex> shape) {
TensorCPU genTensorUniform11(std::vector<int64_t> shape) {
Tensor r(CPU);
r.Resize(shape);
@ -90,7 +90,7 @@ TensorCPU genTensorUniform11(std::vector<TIndex> shape) {
return r;
}
TensorCPU genTensor0123(std::vector<TIndex> shape) {
TensorCPU genTensor0123(std::vector<int64_t> shape) {
Tensor r(CPU);
r.Resize(shape);
@ -171,7 +171,7 @@ inline void qgemmNT(int M, int N, int K, const uint8_t* A, const uint8_t* B, flo
}
}
void gemmTest(TIndex M, TIndex N, TIndex K) {
void gemmTest(int64_t M, int64_t N, int64_t K) {
auto X = genTensor11({M, K});
auto W = genTensor11({N, K});
Tensor XQ(CPU), WQ(CPU), YQ(CPU), Y(CPU);

View File

@ -98,7 +98,7 @@ class MPIAllgatherOp final : public Operator<Context> {
MPI_Comm comm = OperatorBase::Input<MPICommonWorldWrapper>(0).comm();
auto& input = Input(1);
auto* output = Output(0);
vector<TIndex> output_dims = input.dims();
vector<int64_t> output_dims = input.dims();
output_dims[0] *= OperatorBase::Input<MPICommonWorldWrapper>(0).size();
output->Resize(output_dims);
MPI_CHECK(MPI_Allgather(

View File

@ -12,7 +12,7 @@ bool AccuracyOp<float, CPUContext>::RunOnDevice() {
int D = X.dim32(1);
CAFFE_ENFORCE_EQ(label.ndim(), 1);
CAFFE_ENFORCE_EQ(label.dim32(0), N);
Y->Resize(vector<TIndex>());
Y->Resize(vector<int64_t>());
const auto* Xdata = X.data<float>();
const auto* labelData = label.data<int>();
const int top_k = top_k_;

View File

@ -53,7 +53,7 @@ bool AccuracyOp<float, CUDAContext>::RunOnDevice() {
int D = X.dim32(1);
CAFFE_ENFORCE_EQ(label.ndim(), 1);
CAFFE_ENFORCE_EQ(label.dim32(0), N);
Y->Resize(vector<TIndex>());
Y->Resize(vector<int64_t>());
float* Ydata = Y->template mutable_data<float>();
math::Set<float, CUDAContext>(1, 0, Ydata, &context_);
AccuracyKernel<<<

View File

@ -15,14 +15,14 @@ void ComputeArgImpl(
const int n,
const Compare& comp,
const T* X,
TIndex* Y,
int64_t* Y,
Context* context) {
math::Set<TIndex, Context>(prev_size * next_size, TIndex(0), Y, context);
math::Set<int64_t, Context>(prev_size * next_size, int64_t(0), Y, context);
for (int i = 0; i < prev_size; ++i) {
const T* cur_X = X + i * n * next_size + next_size;
for (int k = 1; k < n; ++k) {
for (int j = 0; j < next_size; ++j) {
TIndex* cur_Y = Y + i * next_size + j;
int64_t* cur_Y = Y + i * next_size + j;
if (comp(*cur_X, X[i * n * next_size + *cur_Y * next_size + j])) {
*cur_Y = k;
}
@ -41,7 +41,7 @@ bool ArgMaxReducer<CPUContext>::operator()(
const int next_size,
const int n,
const T* X,
TIndex* Y,
int64_t* Y,
CPUContext* context) const {
ComputeArgImpl(prev_size, next_size, n, std::greater<T>(), X, Y, context);
return true;
@ -54,7 +54,7 @@ bool ArgMinReducer<CPUContext>::operator()(
const int next_size,
const int n,
const T* X,
TIndex* Y,
int64_t* Y,
CPUContext* context) const {
ComputeArgImpl(prev_size, next_size, n, std::less<T>(), X, Y, context);
return true;

View File

@ -28,7 +28,7 @@ __global__ void ComputeArgCUDAKernel(
const Reducer reducer,
const T init,
const T* X,
TIndex* Y) {
int64_t* Y) {
__shared__ typename BlockReduce<int, T>::TempStorage temp_storage;
const int d = stride.d();
for (int idx = blockIdx.x; idx < outer_size; idx += gridDim.x) {
@ -41,7 +41,7 @@ __global__ void ComputeArgCUDAKernel(
}
kv = BlockReduce<int, T>(temp_storage).Reduce(kv, reducer);
if (threadIdx.x == 0) {
Y[idx] = static_cast<TIndex>(kv.key);
Y[idx] = static_cast<int64_t>(kv.key);
}
__syncthreads();
}
@ -56,7 +56,7 @@ bool ArgMaxReducer<CUDAContext>::operator()(
const int next_size,
const int n,
const T* X,
TIndex* Y,
int64_t* Y,
CUDAContext* context) const {
const int outer_size = prev_size * next_size;
const FixedDivisor<int> stride(next_size);
@ -82,7 +82,7 @@ bool ArgMinReducer<CUDAContext>::operator()(
const int next_size,
const int n,
const T* X,
TIndex* Y,
int64_t* Y,
CUDAContext* context) const {
const int outer_size = prev_size * next_size;
const FixedDivisor<int> stride(next_size);

View File

@ -60,7 +60,7 @@ class ArgOp final : public Operator<Context> {
next_size,
n,
X.template data<T>(),
Y->template mutable_data<TIndex>(),
Y->template mutable_data<int64_t>(),
&context_);
}
@ -78,7 +78,7 @@ struct ArgMaxReducer {
const int next_size,
const int n,
const T* X,
TIndex* Y,
int64_t* Y,
Context* context) const;
};
@ -90,7 +90,7 @@ struct ArgMinReducer {
const int next_size,
const int n,
const T* X,
TIndex* Y,
int64_t* Y,
Context* context) const;
};

View File

@ -22,7 +22,7 @@ class AssertOp final : public Operator<Context> {
cmp_tensor_.CopyFrom(Input(0));
auto* cmp_data = cmp_tensor_.template data<T>();
for (TIndex i = 0; i < cmp_tensor_.size(); ++i) {
for (int64_t i = 0; i < cmp_tensor_.size(); ++i) {
CAFFE_ENFORCE((bool)cmp_data[i], [&]() {
std::stringstream ss;
ss << "Assert failed for element " << i

View File

@ -29,8 +29,8 @@ class AtomicFetchAddOp final : public Operator<CPUContext> {
auto& b = Input(2);
auto* c = Output(0);
auto* d = Output(1);
c->Resize(std::vector<TIndex>());
d->Resize(std::vector<TIndex>());
c->Resize(std::vector<int64_t>());
d->Resize(std::vector<int64_t>());
auto* aPtr = a.data<int32_t>();
auto* bPtr = b.data<int32_t>();
auto* cPtr = c->template mutable_data<int32_t>();

View File

@ -105,7 +105,7 @@ bool BatchBoxCoxOp<CPUContext>::DoRunWithType() {
zeros_.clear();
nonzeros_.reserve(D);
zeros_.reserve(D);
for (TIndex j = 0; j < D; j++) {
for (int64_t j = 0; j < D; j++) {
if (lambda1_ptr[j] == 0) {
zeros_.push_back(j);
} else {
@ -121,7 +121,7 @@ bool BatchBoxCoxOp<CPUContext>::DoRunWithType() {
// rows by replicating the input parameters K times. Then finish row-by-row.
TypedCachedBuffers<T>& b = GetBuffers<T>();
if (nonzeros_.size() == D) {
TIndex i = 0;
int64_t i = 0;
if (K > 1) {
TileArrayIntoVector(lambda1_ptr, D, K, &b.lambda1_);
TileArrayIntoVector(lambda2_ptr, D, K, &b.lambda2_);
@ -142,7 +142,7 @@ bool BatchBoxCoxOp<CPUContext>::DoRunWithType() {
D, data_ptr, lambda1_ptr, lambda2_ptr, k_eps, output_ptr);
}
} else if (zeros_.size() == D) {
TIndex i = 0;
int64_t i = 0;
if (K > 1) {
TileArrayIntoVector(lambda2_ptr, D, K, &b.lambda2_z_);
DCHECK_EQ(K * D, b.lambda2_z_.size());
@ -169,7 +169,7 @@ bool BatchBoxCoxOp<CPUContext>::DoRunWithType() {
PackV(nonzeros_.size(), lambda2_ptr, nonzeros_.data(), b.lambda2_.data());
PackV(zeros_.size(), lambda2_ptr, zeros_.data(), b.lambda2_z_.data());
TIndex i = 0;
int64_t i = 0;
b.accumulator_.resize(std::max(nonzeros_.size(), zeros_.size()));
if (K > 1) {
// Truncate to original size, and re-tile with offsets this time.
@ -219,15 +219,15 @@ bool BatchBoxCoxOp<CPUContext>::DoRunWithType() {
template <>
template <typename T>
void BatchBoxCoxOp<CPUContext>::BoxCoxNaive(
TIndex N,
TIndex D,
int64_t N,
int64_t D,
const T* data_ptr,
const T* lambda1_ptr,
const T* lambda2_ptr,
T k_eps,
T* output_ptr) {
for (TIndex i = 0; i < N; i++) {
for (TIndex j = 0; j < D; j++, data_ptr++, output_ptr++) {
for (int64_t i = 0; i < N; i++) {
for (int64_t j = 0; j < D; j++, data_ptr++, output_ptr++) {
T lambda1_v = lambda1_ptr[j];
T lambda2_v = lambda2_ptr[j];
T tmp = std::max(*data_ptr + lambda2_v, k_eps);
@ -245,18 +245,18 @@ void BatchBoxCoxOp<CPUContext>::BoxCoxNaive(
template <>
template <typename T>
void BatchBoxCoxOp<CPUContext>::BoxCoxNonzeroLambda(
TIndex D,
int64_t D,
const T* data_ptr,
const T* lambda1,
const T* lambda2,
T k_eps,
T* out) {
caffe2::math::Add(D, data_ptr, lambda2, out, &context_);
for (TIndex j = 0; j < D; j++) {
for (int64_t j = 0; j < D; j++) {
out[j] = std::max(out[j], k_eps);
}
Pow(D, out, lambda1, out);
for (TIndex j = 0; j < D; j++) {
for (int64_t j = 0; j < D; j++) {
out[j] -= 1.0;
}
caffe2::math::Div(D, out, lambda1, out, &context_);
@ -265,13 +265,13 @@ void BatchBoxCoxOp<CPUContext>::BoxCoxNonzeroLambda(
template <>
template <typename T>
void BatchBoxCoxOp<CPUContext>::BoxCoxZeroLambda(
TIndex D,
int64_t D,
const T* data_ptr,
const T* lambda2,
T k_eps,
T* output_ptr) {
caffe2::math::Add(D, data_ptr, lambda2, output_ptr, &context_);
for (TIndex j = 0; j < D; j++) {
for (int64_t j = 0; j < D; j++) {
output_ptr[j] = std::max(output_ptr[j], k_eps);
}
caffe2::math::Log(D, output_ptr, output_ptr, &context_);

View File

@ -27,8 +27,8 @@ class BatchBoxCoxOp final : public Operator<Context> {
protected:
template <typename T>
void BoxCoxNaive(
TIndex N,
TIndex D,
int64_t N,
int64_t D,
const T* data_ptr,
const T* lambda1_ptr,
const T* lambda2_ptr,
@ -38,7 +38,7 @@ class BatchBoxCoxOp final : public Operator<Context> {
#ifdef CAFFE2_USE_MKL
template <typename T>
void BoxCoxNonzeroLambda(
TIndex D,
int64_t D,
const T* data_ptr,
const T* lambda1,
const T* lambda2,
@ -47,7 +47,7 @@ class BatchBoxCoxOp final : public Operator<Context> {
template <typename T>
void BoxCoxZeroLambda(
TIndex D,
int64_t D,
const T* data_ptr,
const T* lambda2,
T k_eps,

View File

@ -26,21 +26,21 @@ bool BatchBucketizeOp<CPUContext>::RunOnDevice() {
auto feature_dim = feature.dim(1);
auto output_dim = indices.size();
TIndex length_sum = 0;
for (TIndex i = 0; i < lengths.size(); i++) {
int64_t length_sum = 0;
for (int64_t i = 0; i < lengths.size(); i++) {
CAFFE_ENFORCE_GE(feature_dim, indices_data[i]);
length_sum += lengths_data[i];
}
CAFFE_ENFORCE_EQ(length_sum, boundaries.size());
TIndex lower_bound = 0;
int64_t lower_bound = 0;
output->Resize(batch_size, output_dim);
auto* output_data = output->template mutable_data<int32_t>();
for (TIndex i = 0; i < batch_size; i++) {
for (int64_t i = 0; i < batch_size; i++) {
lower_bound = 0;
for (TIndex j = 0; j < output_dim; j++) {
for (TIndex k = 0; k <= lengths_data[j]; k++) {
for (int64_t j = 0; j < output_dim; j++) {
for (int64_t k = 0; k <= lengths_data[j]; k++) {
if (k == lengths_data[j] ||
feature_data[i * feature_dim + indices_data[j]] <=
boundaries_data[lower_bound + k]) {

View File

@ -41,7 +41,7 @@ bool BatchGatherOp<CUDAContext>::DoRunWithType() {
auto& indices = Input(INDICES);
auto* output = Output(0);
vector<TIndex> shape;
vector<int64_t> shape;
shape.push_back(data.dim(0));
shape.insert(shape.end(), indices.dims().begin(), indices.dims().end());
shape.insert(shape.end(), data.dims().begin() + 2, data.dims().end());

View File

@ -26,7 +26,7 @@ class BatchGatherOp final : public Operator<Context> {
CAFFE_ENFORCE_GE(data.ndim(), 2, "DATA should be at least 2-D");
vector<TIndex> shape;
vector<int64_t> shape;
shape.push_back(data.dim(0));
shape.insert(shape.end(), indices.dims().begin(), indices.dims().end());
shape.insert(shape.end(), data.dims().begin() + 2, data.dims().end());

View File

@ -27,16 +27,16 @@ vector<TensorShape> TensorInferenceForBatchMatMul(
b_dim1 = in[1].dims(ndim - 1);
}
auto output_dims = vector<TIndex>{in[0].dims().begin(), in[0].dims().end()};
auto output_dims = vector<int64_t>{in[0].dims().begin(), in[0].dims().end()};
output_dims[ndim - 2] = a_dim0;
output_dims[ndim - 1] = b_dim1;
return vector<TensorShape>{
CreateTensorShape(vector<TIndex>{output_dims}, in[0].data_type())};
CreateTensorShape(vector<int64_t>{output_dims}, in[0].data_type())};
} else {
auto ndims_A = in[0].dims_size();
auto ndims_B = in[1].dims_size();
std::vector<TIndex> dims_A(ndims_A), dims_B(ndims_B);
std::vector<int64_t> dims_A(ndims_A), dims_B(ndims_B);
for (int i = 0; i < ndims_A; ++i) {
dims_A[i] = in[0].dims(i);
}
@ -66,7 +66,7 @@ vector<TensorShape> TensorInferenceForBatchMatMul(
N = dims_B[ndims_B - 1];
}
std::vector<TIndex> new_dims;
std::vector<int64_t> new_dims;
if (ndims_A >= ndims_B) {
new_dims.assign(dims_A.begin(), dims_A.end() - 2);
} else {
@ -82,7 +82,7 @@ vector<TensorShape> TensorInferenceForBatchMatMul(
new_dims.push_back(1);
}
return vector<TensorShape>{
CreateTensorShape(vector<TIndex>{new_dims}, in[0].data_type())};
CreateTensorShape(vector<int64_t>{new_dims}, in[0].data_type())};
}
}

View File

@ -175,7 +175,7 @@ class BatchMatMulOp final : public Operator<Context> {
// Calculate output tensor shapes [B..., (M), (N)]
// Batch dimensions will be broadcasted out to those of the longer tensor
// A or B. Either M or N are optional if A or B, respectively are 1-D.
std::vector<TIndex> new_dims;
std::vector<int64_t> new_dims;
if (ndims_A >= ndims_B) {
new_dims.assign(dims_A.begin(), dims_A.end() - 2);
} else {

View File

@ -26,7 +26,7 @@ class BatchMatMulOpGPUTest : public testing::Test {
}
void AddConstInput(
const std::vector<TIndex>& dims,
const std::vector<int64_t>& dims,
const float value,
const string& name) {
Blob* blob = ws_.CreateBlob(name);
@ -39,7 +39,7 @@ class BatchMatMulOpGPUTest : public testing::Test {
cuda_context_.get());
}
void VerifyOutput(const std::vector<TIndex>& dims, const float value) const {
void VerifyOutput(const std::vector<int64_t>& dims, const float value) const {
const Blob* Y_blob = ws_.GetBlob("Y");
ASSERT_NE(nullptr, Y_blob);
const auto& Y = Y_blob->Get<Tensor>();
@ -64,12 +64,12 @@ TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUNormalTest) {
if (!HasCudaGPU()) {
return;
}
AddConstInput(std::vector<TIndex>{3, 5, 10}, 1.0f, "A");
AddConstInput(std::vector<TIndex>{3, 10, 6}, 1.0f, "B");
AddConstInput(std::vector<int64_t>{3, 5, 10}, 1.0f, "A");
AddConstInput(std::vector<int64_t>{3, 10, 6}, 1.0f, "B");
std::unique_ptr<OperatorBase> op(CreateOperator(def_, &ws_));
ASSERT_NE(nullptr, op);
ASSERT_TRUE(op->Run());
VerifyOutput(std::vector<TIndex>{3, 5, 6}, 10.0f);
VerifyOutput(std::vector<int64_t>{3, 5, 6}, 10.0f);
}
TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUBroadcastTest) {
@ -79,12 +79,12 @@ TEST_F(BatchMatMulOpGPUTest, BatchMatMulOpGPUBroadcastTest) {
auto* arg = def_.add_arg();
arg->set_name("broadcast");
arg->set_i(1);
AddConstInput(std::vector<TIndex>{3, 5, 10}, 1.0f, "A");
AddConstInput(std::vector<TIndex>{2, 3, 10, 6}, 1.0f, "B");
AddConstInput(std::vector<int64_t>{3, 5, 10}, 1.0f, "A");
AddConstInput(std::vector<int64_t>{2, 3, 10, 6}, 1.0f, "B");
std::unique_ptr<OperatorBase> op(CreateOperator(def_, &ws_));
ASSERT_NE(nullptr, op);
ASSERT_TRUE(op->Run());
VerifyOutput(std::vector<TIndex>{2, 3, 5, 6}, 10.0f);
VerifyOutput(std::vector<int64_t>{2, 3, 5, 6}, 10.0f);
}
} // namespace

View File

@ -20,7 +20,7 @@ class BatchMatMulOpTest : public testing::Test {
}
void AddConstInput(
const std::vector<TIndex>& dims,
const std::vector<int64_t>& dims,
const float value,
const string& name) {
Blob* blob = ws_.CreateBlob(name);
@ -33,7 +33,7 @@ class BatchMatMulOpTest : public testing::Test {
cpu_context_.get());
}
void VerifyOutput(const std::vector<TIndex>& dims, const float value) const {
void VerifyOutput(const std::vector<int64_t>& dims, const float value) const {
const Blob* Y_blob = ws_.GetBlob("Y");
ASSERT_NE(nullptr, Y_blob);
const auto& Y = Y_blob->Get<TensorCPU>();
@ -54,24 +54,24 @@ class BatchMatMulOpTest : public testing::Test {
};
TEST_F(BatchMatMulOpTest, BatchMatMulOpNormalTest) {
AddConstInput(std::vector<TIndex>{3, 5, 10}, 1.0f, "A");
AddConstInput(std::vector<TIndex>{3, 10, 6}, 1.0f, "B");
AddConstInput(std::vector<int64_t>{3, 5, 10}, 1.0f, "A");
AddConstInput(std::vector<int64_t>{3, 10, 6}, 1.0f, "B");
std::unique_ptr<OperatorBase> op(CreateOperator(def_, &ws_));
ASSERT_NE(nullptr, op);
ASSERT_TRUE(op->Run());
VerifyOutput(std::vector<TIndex>{3, 5, 6}, 10.0f);
VerifyOutput(std::vector<int64_t>{3, 5, 6}, 10.0f);
}
TEST_F(BatchMatMulOpTest, BatchMatMulOpBroadcastTest) {
auto* arg = def_.add_arg();
arg->set_name("broadcast");
arg->set_i(1);
AddConstInput(std::vector<TIndex>{3, 5, 10}, 1.0f, "A");
AddConstInput(std::vector<TIndex>{2, 3, 10, 6}, 1.0f, "B");
AddConstInput(std::vector<int64_t>{3, 5, 10}, 1.0f, "A");
AddConstInput(std::vector<int64_t>{2, 3, 10, 6}, 1.0f, "B");
std::unique_ptr<OperatorBase> op(CreateOperator(def_, &ws_));
ASSERT_NE(nullptr, op);
ASSERT_TRUE(op->Run());
VerifyOutput(std::vector<TIndex>{2, 3, 5, 6}, 10.0f);
VerifyOutput(std::vector<int64_t>{2, 3, 5, 6}, 10.0f);
}
} // namespace

View File

@ -14,15 +14,15 @@ bool BatchSparseToDenseOp<T, Context>::RunOnDevice() {
CAFFE_ENFORCE_EQ(lengths.ndim(), 1);
CAFFE_ENFORCE_EQ(indices.ndim(), 1);
const TIndex* lengths_data = lengths.template data<TIndex>();
const TIndex* indices_data = indices.template data<TIndex>();
const int64_t* lengths_data = lengths.template data<int64_t>();
const int64_t* indices_data = indices.template data<int64_t>();
const T* values_data = values.template data<T>();
TIndex batch_size = lengths.size();
TIndex lengths_sum = 0;
math::Sum<TIndex, Context>(batch_size, lengths_data, &lengths_sum, &context_);
int64_t batch_size = lengths.size();
int64_t lengths_sum = 0;
math::Sum<int64_t, Context>(batch_size, lengths_data, &lengths_sum, &context_);
CAFFE_ENFORCE_EQ(lengths_sum, indices.size());
vector<TIndex> output_shape = {batch_size};
vector<int64_t> output_shape = {batch_size};
if (InputSize() == 4) {
auto& shaper = Input(3);
CAFFE_ENFORCE_EQ(shaper.ndim(), 2);
@ -42,9 +42,9 @@ bool BatchSparseToDenseOp<T, Context>::RunOnDevice() {
math::Set(
output->size(), static_cast<T>(default_value_), output_data, &context_);
TIndex k = 0;
for (TIndex i = 0; i < batch_size; ++i) {
for (TIndex j = 0; j < lengths_data[i]; ++j) {
int64_t k = 0;
for (int64_t i = 0; i < batch_size; ++i) {
for (int64_t j = 0; j < lengths_data[i]; ++j) {
CAFFE_ENFORCE(
indices_data[k] < dense_last_dim_,
"An indice (",
@ -69,24 +69,24 @@ bool BatchDenseToSparseOp<T, Context>::RunOnDevice() {
CAFFE_ENFORCE_EQ(lengths.ndim(), 1);
CAFFE_ENFORCE_EQ(indices.ndim(), 1);
CAFFE_ENFORCE_EQ(dense.ndim(), 2);
const TIndex* lengths_data = lengths.template data<TIndex>();
const TIndex* indices_data = indices.template data<TIndex>();
const int64_t* lengths_data = lengths.template data<int64_t>();
const int64_t* indices_data = indices.template data<int64_t>();
const T* dense_data = dense.template data<T>();
TIndex batch_size = lengths.size();
TIndex lengths_sum = 0;
math::Sum<TIndex, Context>(batch_size, lengths_data, &lengths_sum, &context_);
int64_t batch_size = lengths.size();
int64_t lengths_sum = 0;
math::Sum<int64_t, Context>(batch_size, lengths_data, &lengths_sum, &context_);
CAFFE_ENFORCE_EQ(lengths_sum, indices.size());
CAFFE_ENFORCE_EQ(batch_size, dense.dim(0));
dense_last_dim_ = dense.dim(1);
vector<TIndex> output_shape = indices.dims();
vector<int64_t> output_shape = indices.dims();
output->Resize(output_shape);
T* output_data = output->template mutable_data<T>();
TIndex k = 0;
for (TIndex i = 0; i < batch_size; ++i) {
for (TIndex j = 0; j < lengths_data[i]; ++j) {
int64_t k = 0;
for (int64_t i = 0; i < batch_size; ++i) {
for (int64_t j = 0; j < lengths_data[i]; ++j) {
CAFFE_ENFORCE(
indices_data[k] < dense.dim(1),
"An indice (",

View File

@ -15,12 +15,12 @@ class BatchSparseToDenseOp : public Operator<Context> {
USE_OPERATOR_CONTEXT_FUNCTIONS;
BatchSparseToDenseOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
OP_SINGLE_ARG(TIndex, "dense_last_dim", dense_last_dim_, -1),
OP_SINGLE_ARG(int64_t, "dense_last_dim", dense_last_dim_, -1),
OP_SINGLE_ARG(T, "default_value", default_value_, static_cast<T>(0)) {}
bool RunOnDevice() override;
private:
TIndex dense_last_dim_;
int64_t dense_last_dim_;
T default_value_;
INPUT_TAGS(LENGTHS, INDICES, VALUES);
};
@ -34,7 +34,7 @@ class BatchDenseToSparseOp : public Operator<Context> {
bool RunOnDevice() override;
private:
TIndex dense_last_dim_;
int64_t dense_last_dim_;
INPUT_TAGS(LENGTHS, INDICES, DENSE);
};

View File

@ -138,7 +138,7 @@ bool BBoxTransformOp<float, CPUContext>::RunOnDevice() {
}
}
CAFFE_ENFORCE_EQ(iminfo_in.dims(), (vector<TIndex>{batch_size, 3}));
CAFFE_ENFORCE_EQ(iminfo_in.dims(), (vector<int64_t>{batch_size, 3}));
Eigen::Map<const ERArrXXf> iminfo(
iminfo_in.data<float>(), iminfo_in.dim(0), iminfo_in.dim(1));

View File

@ -62,7 +62,7 @@ bool BooleanMaskOp<CPUContext>::RunOnDevice() {
++numOutputs;
}
}
std::vector<TIndex> outShape;
std::vector<int64_t> outShape;
outShape.push_back(numOutputs);
outShape.insert(outShape.end(), data.dims().begin() + 1, data.dims().end());
dataOut->Resize(outShape);
@ -81,11 +81,11 @@ bool BooleanMaskOp<CPUContext>::RunOnDevice() {
const auto innerSize = data.size_from_dim(1);
const auto innerSizeBytes = innerSize * data.meta().itemsize();
TIndex lastStart = -1;
int64_t lastStart = -1;
const auto* inPtr = (char*)data.raw_data();
TIndex outStart = 0;
int64_t outStart = 0;
for (TIndex i = 0;; ++i) {
for (int64_t i = 0;; ++i) {
// mask was true and either a) became false, or b) sequence finished
if (lastStart != -1 && ((i >= outerSize) || !maskPtr[i])) {
const auto* src = inPtr + lastStart * innerSizeBytes;

View File

@ -7,15 +7,15 @@ namespace caffe2 {
namespace {
__global__ void BooleanMaskCopyKernel(
const TIndex numOfOutput,
const TIndex numBytes,
const TIndex* indices,
const int64_t numOfOutput,
const int64_t numBytes,
const int64_t* indices,
const uint8_t* src,
uint8_t* dest) {
for (TIndex i = blockIdx.x; i < numOfOutput; i += gridDim.x) {
for (int64_t i = blockIdx.x; i < numOfOutput; i += gridDim.x) {
const auto srcBase = indices[i] * numBytes;
const auto destBase = i * numBytes;
for (TIndex j = threadIdx.x; j < numBytes; j += blockDim.x) {
for (int64_t j = threadIdx.x; j < numBytes; j += blockDim.x) {
dest[destBase + j] = src[srcBase + j];
}
}
@ -40,7 +40,7 @@ class BooleanMaskOp<CUDAContext> final : public Operator<CUDAContext> {
const auto* maskData = mask.data<bool>();
const auto outerSize = mask.dims()[0];
indices_.Resize(outerSize);
auto* indicesData = indices_.mutable_data<TIndex>();
auto* indicesData = indices_.mutable_data<int64_t>();
size_t numBytes = 0;
cub::CountingInputIterator<int> itr(0);
@ -50,16 +50,16 @@ class BooleanMaskOp<CUDAContext> final : public Operator<CUDAContext> {
itr,
maskData,
indicesData,
static_cast<TIndex*>(nullptr),
static_cast<int64_t*>(nullptr),
outerSize,
context_.cuda_stream());
auto numTIndex =
static_cast<TIndex>((numBytes + sizeof(TIndex) - 1) / sizeof(TIndex));
// allocate one more TIndex at the end of scratch for storing numOfOutput
scratch_.Resize(numTIndex + 1);
auto* scratchData = scratch_.mutable_data<TIndex>();
auto* numOfOutputData = scratchData + numTIndex;
auto numint64_t =
static_cast<int64_t>((numBytes + sizeof(int64_t) - 1) / sizeof(int64_t));
// allocate one more int64_t at the end of scratch for storing numOfOutput
scratch_.Resize(numint64_t + 1);
auto* scratchData = scratch_.mutable_data<int64_t>();
auto* numOfOutputData = scratchData + numint64_t;
cub::DeviceSelect::Flagged(
static_cast<void*>(scratchData),
@ -72,11 +72,11 @@ class BooleanMaskOp<CUDAContext> final : public Operator<CUDAContext> {
context_.cuda_stream());
// Copy numOfOutput from gpu to cpu
TIndex numOfOutput;
int64_t numOfOutput;
context_.CopyToCPU(1, numOfOutputData, &numOfOutput);
indices_.Resize(numOfOutput);
std::vector<TIndex> dims = src.dims();
std::vector<int64_t> dims = src.dims();
dims[0] = numOfOutput;
dest->Resize(dims);
auto* destData = (uint8_t*)dest->raw_mutable_data(src.meta());
@ -84,12 +84,12 @@ class BooleanMaskOp<CUDAContext> final : public Operator<CUDAContext> {
if (OutputSize() == 2) {
auto* indicesOut = Output(1);
indicesOut->Resize(numOfOutput);
indicesOut->template mutable_data<TIndex>();
indicesOut->template mutable_data<int64_t>();
}
if (numOfOutput > 0) {
BooleanMaskCopyKernel<<<
min(numOfOutput, static_cast<TIndex>(CAFFE_MAXIMUM_NUM_BLOCKS)),
min(numOfOutput, static_cast<int64_t>(CAFFE_MAXIMUM_NUM_BLOCKS)),
CAFFE_CUDA_NUM_THREADS,
0,
context_.cuda_stream()>>>(

View File

@ -18,10 +18,10 @@ static void AddScalarInput(
Blob* blob = ws->CreateBlob(name);
auto* tensor = blob->GetMutableTensor(CPU);
if (!isEmpty) {
tensor->Resize(vector<TIndex>{1});
tensor->Resize(vector<int64_t>{1});
*(tensor->template mutable_data<DataT>()) = value;
} else {
tensor->Resize(vector<TIndex>{0});
tensor->Resize(vector<int64_t>{0});
tensor->template mutable_data<DataT>();
}
return;

View File

@ -11,7 +11,7 @@ bool CastOp<CPUContext>::DoRunWithType() {
const auto* data = input.template data<SrcType>();
auto* out = output->template mutable_data<DstType>();
auto N = input.size();
for (TIndex i = 0; i < N; ++i) {
for (int64_t i = 0; i < N; ++i) {
out[i] = static_cast<DstType>(data[i]);
}
return true;

View File

@ -42,7 +42,7 @@ class CastOp : public Operator<Context> {
const auto* data = input.template data<SrcType>();
auto* out = output->template mutable_data<DstType>();
auto N = input.size();
for (TIndex i = 0; i < N; ++i) {
for (int64_t i = 0; i < N; ++i) {
out[i] = static_cast<DstType>(data[i]);
}
return true;

View File

@ -161,7 +161,7 @@ bool SplitOp<Context>::RunOnDevice() {
input_channels,
"Sum of split dimensions do not match: should be ",
input_channels);
vector<TIndex> output_dims(input.dims());
vector<int64_t> output_dims(input.dims());
int before = 1, after = 1;
for (int i = 0; i < canonical_axis; ++i) {
before *= input.dim32(i);
@ -215,7 +215,7 @@ bool SplitByLengthsOp<Context>::RunOnDevice() {
input_channels,
"Sum of split dimensions do not match: should be ",
input_channels);
vector<TIndex> output_dims(input.dims());
vector<int64_t> output_dims(input.dims());
int before = input.size_to_dim(canonical_axis);
int after = input.size_from_dim(canonical_axis + 1);
size_t input_offset = 0;
@ -245,7 +245,7 @@ template <class Context>
bool ConcatOp<Context>::RunOnDevice() {
auto* output = Output(0);
Tensor* split = this->template Output<Tensor>(1, CPU);
split->Resize(vector<TIndex>(1, InputSize()));
split->Resize(vector<int64_t>(1, InputSize()));
int* axis_data = split->template mutable_data<int>();
auto& input_zero = Input(0);
int adj_size = input_zero.ndim() + (add_axis_ ? 1 : 0);
@ -263,7 +263,7 @@ bool ConcatOp<Context>::RunOnDevice() {
}
int before = 1, after = 1;
vector<TIndex> output_dims(input_zero.dims());
vector<int64_t> output_dims(input_zero.dims());
for (int i = 0; i < input_zero.ndim(); ++i) {
if (i == canonical_axis && !add_axis_) {
continue;

View File

@ -31,7 +31,7 @@ bool ConditionalOp<CPUContext>::RunOnDevice() {
// perform conditional op along first dimension
const auto* ptrT = (char*)dataT.raw_data();
const auto* ptrF = (char*)dataF.raw_data();
for (TIndex i = 0; i < condition.size(); i++) {
for (int64_t i = 0; i < condition.size(); i++) {
auto* dst = outPtr + i * innerSizeBytes;
if (condPtr[i]) {
context_.CopyItemsSameDevice(

View File

@ -16,8 +16,8 @@ class AlgorithmsCache {
// combination of tensor dimensions & compute data type.
//
TAlgorithm getAlgorithm(
const std::vector<TIndex>& tensorDimensions1,
const std::vector<TIndex>& tensorDimensions2,
const std::vector<int64_t>& tensorDimensions1,
const std::vector<int64_t>& tensorDimensions2,
int algorithmFlags, // Differentiate between algorithms with different
// parameters in a generic way
std::function<TAlgorithm()> generatingFunc);
@ -28,14 +28,14 @@ class AlgorithmsCache {
template <typename TAlgorithm>
TAlgorithm AlgorithmsCache<TAlgorithm>::getAlgorithm(
const std::vector<TIndex>& tensorDimensions1,
const std::vector<TIndex>& tensorDimensions2,
const std::vector<int64_t>& tensorDimensions1,
const std::vector<int64_t>& tensorDimensions2,
int algorithmFlags,
std::function<TAlgorithm()> generatingFunc) {
int64_t seed = 0;
// Hash all of the inputs, which we wiill then use to try and look up
// a previously discovered algorithm, or fall back to generating a new one.
std::hash<TIndex> hashFn;
std::hash<int64_t> hashFn;
for (const auto num : tensorDimensions1) {
// Copied from boost::hash_combine.
// Adding 1 to differentiate between first and second vector.

View File

@ -12,11 +12,11 @@ namespace caffe2 {
TEST(AlgorithmsCacheTest, CachesCorrectly) {
AlgorithmsCache<int> cache;
int result = cache.getAlgorithm(
std::vector<TIndex>(1), std::vector<TIndex>(1), 0, []() { return 5; });
std::vector<int64_t>(1), std::vector<int64_t>(1), 0, []() { return 5; });
EXPECT_EQ(result, 5);
int res2 = cache.getAlgorithm(
std::vector<TIndex>(1), std::vector<TIndex>(1), 0, []() { return 10; });
std::vector<int64_t>(1), std::vector<int64_t>(1), 0, []() { return 10; });
EXPECT_EQ(res2, 5);
}
@ -24,11 +24,11 @@ TEST(AlgorithmsCacheTest, CachesCorrectly) {
TEST(AlgorithmsCacheTest, KeysDifferIfOneVectorIsEmpty) {
AlgorithmsCache<int> cache;
int result = cache.getAlgorithm(
std::vector<TIndex>(1, 10), std::vector<TIndex>(), 0, []() { return 5; });
std::vector<int64_t>(1, 10), std::vector<int64_t>(), 0, []() { return 5; });
EXPECT_EQ(result, 5);
int res2 = cache.getAlgorithm(
std::vector<TIndex>(), std::vector<TIndex>(1, 10), 0, []() {
std::vector<int64_t>(), std::vector<int64_t>(1, 10), 0, []() {
return 10;
});
@ -38,20 +38,20 @@ TEST(AlgorithmsCacheTest, KeysDifferIfOneVectorIsEmpty) {
TEST(AlgorithmsCacheTest, KeysDifferIfFlagsAreDifferent) {
AlgorithmsCache<int> cache;
int result = cache.getAlgorithm(
std::vector<TIndex>{2, 3, 4}, std::vector<TIndex>{5, 6}, 123, []() {
std::vector<int64_t>{2, 3, 4}, std::vector<int64_t>{5, 6}, 123, []() {
return 5;
});
EXPECT_EQ(result, 5);
int res2 = cache.getAlgorithm(
std::vector<TIndex>{2, 3, 4}, std::vector<TIndex>{5, 6}, 456, []() {
std::vector<int64_t>{2, 3, 4}, std::vector<int64_t>{5, 6}, 456, []() {
return 10;
});
EXPECT_EQ(res2, 10);
int res3 = cache.getAlgorithm(
std::vector<TIndex>{2, 3, 4}, std::vector<TIndex>{5, 6}, 456, []() {
std::vector<int64_t>{2, 3, 4}, std::vector<int64_t>{5, 6}, 456, []() {
return 15;
});

View File

@ -411,8 +411,8 @@ class CudnnConvOpBase : public ConvPoolOpBase<CUDAContext> {
}
}
vector<TIndex> cudnn_input_dims_;
vector<TIndex> cudnn_filter_dims_;
vector<int64_t> cudnn_input_dims_;
vector<int64_t> cudnn_filter_dims_;
CuDNNWrapper cudnn_wrapper_;
cudnnTensorDescriptor_t bottom_desc_;

View File

@ -42,10 +42,10 @@ bool EigenConvOp<T>::RunOnDeviceWithOrderNCHW() {
CAFFE_ENFORCE(filter.dim32(2) == kernel_h());
CAFFE_ENFORCE(filter.dim32(3) == kernel_w());
ConvPoolOpBase<CPUContext>::SetOutputSize(X, Y, filter.dim32(0));
Eigen::array<TIndex, 4> kernel_shuffles
{ {TIndex(2), TIndex(3), TIndex(1), TIndex(0)} };
Eigen::array<TIndex, 4> input_shuffles
{ {TIndex(0), TIndex(2), TIndex(3), TIndex(1)} };
Eigen::array<int64_t, 4> kernel_shuffles
{ {int64_t(2), int64_t(3), int64_t(1), int64_t(0)} };
Eigen::array<int64_t, 4> input_shuffles
{ {int64_t(0), int64_t(2), int64_t(3), int64_t(1)} };
Eigen::Tensor<T, 4, Eigen::RowMajor> filter_tensor =
Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
@ -109,14 +109,14 @@ bool EigenConvOp<T>::RunOnDeviceWithOrderNCHW() {
// It seems that the bias broadcast is still slower so let's do the
// following for now.
EigenArrayMap<T> Y_arr(
Y_tensor.data(), static_cast<TIndex>(M), Y->size() / M);
Y_tensor.data(), static_cast<int64_t>(M), Y->size() / M);
ConstEigenVectorArrayMap<T> bias_arr(bias.template data<T>(), M);
Y_arr = Y_arr.colwise() + bias_arr;
}
// Do a last transpose.
Eigen::array<TIndex, 4> output_shuffles
{ {TIndex(0), TIndex(3), TIndex(1), TIndex(2) } };
Eigen::array<int64_t, 4> output_shuffles
{ {int64_t(0), int64_t(3), int64_t(1), int64_t(2) } };
Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
Y->template mutable_data<T>(), N, M, Y->dim32(2), Y->dim32(3)) =
@ -204,7 +204,7 @@ bool EigenConvOp<T>::RunOnDeviceWithOrderNHWC() {
// It seems that the bias broadcast is still slower so let's do the
// following for now.
EigenArrayMap<T> Y_arr(
Y->template mutable_data<T>(), static_cast<TIndex>(M), Y->size() / M);
Y->template mutable_data<T>(), static_cast<int64_t>(M), Y->size() / M);
ConstEigenVectorArrayMap<T> bias_arr(bias.template data<T>(), M);
Y_arr = Y_arr.colwise() + bias_arr;
}

View File

@ -240,7 +240,7 @@ bool ConvOp<T, Context>::RunOnDeviceWithOrderNHWC() {
}
auto f = [&](Tensor* col_buffer) {
col_buffer->Resize(
vector<TIndex>{Y->dim32(1), Y->dim32(2), kernel_h(), kernel_w(), C});
vector<int64_t>{Y->dim32(1), Y->dim32(2), kernel_h(), kernel_w(), C});
T* col_buffer_data = col_buffer->template mutable_data<T>();
// Im2Col, followed by gemm.
for (int image_id = 0; image_id < N; ++image_id) {
@ -504,7 +504,7 @@ bool ConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
dbias->Resize(M);
if (bias_multiplier_.size() != output_image_size) {
// If the helper bias multiplier is not M, reshape and fill it with one.
bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
math::Set<T, Context>(
output_image_size,
static_cast<T>(1),
@ -689,7 +689,7 @@ bool ConvGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
math::Set<T, Context>(dbias->size(), 0, dbias_data, &context_);
if (bias_multiplier_.size() != output_image_size) {
// If the helper bias multiplier is not M, reshape and fill it with one.
bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
math::Set<T, Context>(
output_image_size,
static_cast<T>(1),

View File

@ -246,7 +246,7 @@ class ConvPoolOpBase : public Operator<Context> {
// Helper function that is also called from OperatorSchema. Modified
// kernel parameters and output output_dims and channel_first.
static inline void InferOutputSize(
vector<TIndex> input_dims,
vector<int64_t> input_dims,
int /*output_channel*/,
StorageOrder order,
bool global_pooling,
@ -259,7 +259,7 @@ class ConvPoolOpBase : public Operator<Context> {
vector<int>& pads,
bool& channel_first) {
channel_first = false; // initialized to suppress compiler warning.
vector<TIndex> dims;
vector<int64_t> dims;
switch (order) {
case StorageOrder::NHWC:
channel_first = false;
@ -358,7 +358,7 @@ class ConvPoolOpBase : public Operator<Context> {
if (bias_multiplier_->size() != size) {
// If the helper bias multiplier is not image size, reshape and fill it
// with one.
bias_multiplier_->Resize(std::vector<TIndex>{size});
bias_multiplier_->Resize(std::vector<int64_t>{size});
math::Set<T, Context>(
size,
static_cast<T>(1),

View File

@ -64,8 +64,8 @@ class CudnnConvTransposeOpBase : public ConvTransposeUnpoolBase<CUDAContext> {
}
protected:
vector<TIndex> cudnn_input_dims_;
vector<TIndex> cudnn_filter_dims_;
vector<int64_t> cudnn_input_dims_;
vector<int64_t> cudnn_filter_dims_;
CuDNNWrapper cudnn_wrapper_;
cudnnTensorDescriptor_t bottom_desc_;

View File

@ -45,7 +45,7 @@ bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNCHW() {
bias.dim32(0) == C,
"bias dimension must be equal to output channel number");
if (bias_multiplier_.size() != output_image_size) {
bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
T* bm_data = bias_multiplier_.template mutable_data<T>();
math::Set<T, Context>(
output_image_size,
@ -61,7 +61,7 @@ bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNCHW() {
auto f = [&](Tensor* col_buffer) {
col_buffer->Resize(
vector<TIndex>{C, this->kernel_h(), this->kernel_w(), H, W});
vector<int64_t>{C, this->kernel_h(), this->kernel_w(), H, W});
T* col_buffer_data = col_buffer->template mutable_data<T>();
for (auto image_id = 0; image_id < N; ++image_id) {
// Weight term
@ -167,7 +167,7 @@ bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNHWC() {
bias.dim32(0) == C,
"bias dimension must be equal to output channel number");
if (bias_multiplier_.size() != output_image_size) {
bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
T* bm_data = bias_multiplier_.template mutable_data<T>();
math::Set<T, Context>(
output_image_size,
@ -182,7 +182,7 @@ bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNHWC() {
auto f = [&](Tensor* /*col_buffer*/) {
col_buffer_.Resize(
vector<TIndex>{H, W, this->kernel_h(), this->kernel_w(), C});
vector<int64_t>{H, W, this->kernel_h(), this->kernel_w(), C});
T* col_buffer_data = col_buffer_.template mutable_data<T>();
for (auto image_id = 0; image_id < N; ++image_id) {
// Weight term
@ -270,7 +270,7 @@ bool ConvTransposeGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
const int output_image_size = dY.dim32(2) * dY.dim32(3);
// The col buffer is stored in CHW order as well
col_buffer_.Resize(
vector<TIndex>{C, this->kernel_h(), this->kernel_w(), H, W});
vector<int64_t>{C, this->kernel_h(), this->kernel_w(), H, W});
if (!no_bias_) {
auto* dbias = Output(BIAS_OR_INPUT_GRAD);
dbias->Resize(C);
@ -422,7 +422,7 @@ bool ConvTransposeGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
const int output_image_size = dY.dim32(1) * dY.dim32(2);
// The col buffer is stored in HWC order as well
col_buffer_.Resize(
vector<TIndex>{H, W, this->kernel_h(), this->kernel_w(), C});
vector<int64_t>{H, W, this->kernel_h(), this->kernel_w(), C});
if (!no_bias_) {
auto* dbias = Output(BIAS_OR_INPUT_GRAD);
dbias->Resize(C);

View File

@ -10,7 +10,7 @@
namespace caffe2 {
void AddConstInput(const vector<TIndex>& shape,
void AddConstInput(const vector<int64_t>& shape,
const float value,
const string& name,
Workspace* ws) {
@ -23,7 +23,7 @@ void AddConstInput(const vector<TIndex>& shape,
tensor->size(), value, tensor->template mutable_data<float>(), &context);
}
void AddNoiseInput(const vector<TIndex>& shape,
void AddNoiseInput(const vector<int64_t>& shape,
const string& name,
Workspace* ws) {
DeviceOption option;
@ -81,9 +81,9 @@ void compare(int N, int inputC, int H, int W,
def1.add_arg()->CopyFrom(MakeArgument("adj_h", adjH));
def1.add_arg()->CopyFrom(MakeArgument("adj_w", adjW));
AddNoiseInput(vector<TIndex>{N, inputC, H, W}, "X", &ws);
AddNoiseInput(vector<TIndex>{inputC, outputC, kernelH, kernelW}, "W", &ws);
AddNoiseInput(vector<TIndex>{outputC}, "B", &ws);
AddNoiseInput(vector<int64_t>{N, inputC, H, W}, "X", &ws);
AddNoiseInput(vector<int64_t>{inputC, outputC, kernelH, kernelW}, "W", &ws);
AddNoiseInput(vector<int64_t>{outputC}, "B", &ws);
unique_ptr<OperatorBase> op1(CreateOperator(def1, &ws));
EXPECT_NE(nullptr, op1.get());

View File

@ -80,9 +80,9 @@ bool SigmoidCrossEntropyWithLogitsOp<float, CPUContext>::RunOnDevice() {
auto* out = Output(0);
if (logits.ndim() == 0) {
out->Resize(std::vector<TIndex>{});
out->Resize(std::vector<int64_t>{});
} else {
std::vector<TIndex> dims(logits.dims().begin(), logits.dims().end() - 1);
std::vector<int64_t> dims(logits.dims().begin(), logits.dims().end() - 1);
out->Resize(dims);
}
auto* out_ptr = out->template mutable_data<float>();
@ -162,9 +162,9 @@ bool WeightedSigmoidCrossEntropyWithLogitsOp<float, CPUContext>::RunOnDevice() {
auto* out = Output(0);
if (logits.ndim() == 0) {
out->Resize(std::vector<TIndex>{});
out->Resize(std::vector<int64_t>{});
} else {
std::vector<TIndex> dims(logits.dims().begin(), logits.dims().end() - 1);
std::vector<int64_t> dims(logits.dims().begin(), logits.dims().end() - 1);
out->Resize(dims);
}
auto* out_ptr = out->template mutable_data<float>();
@ -260,11 +260,11 @@ bool MakeTwoClassOp<float, CPUContext>::RunOnDevice() {
auto* Y = Output(0);
auto shape = X.dims();
shape.push_back(2);
TIndex N = X.size();
int64_t N = X.size();
Y->Resize(shape);
const auto* Xdata = X.data<float>();
auto* Ydata = Y->template mutable_data<float>();
for (TIndex i = 0; i < N; ++i) {
for (int64_t i = 0; i < N; ++i) {
DCHECK_GE(Xdata[i], 0.0);
DCHECK_LE(Xdata[i], 1.0);
Ydata[i * 2] = 1.0 - Xdata[i];
@ -284,9 +284,9 @@ bool MakeTwoClassGradientOp<float, CPUContext>::RunOnDevice() {
dX->Resize(shape);
const float* dYdata = dY.data<float>();
float* dXdata = dX->template mutable_data<float>();
TIndex N = dX->size();
int64_t N = dX->size();
// use eigen?
for (TIndex i = 0; i < N; ++i) {
for (int64_t i = 0; i < N; ++i) {
dXdata[i] = dYdata[i * 2 + 1] - dYdata[i * 2];
}
return true;
@ -308,7 +308,7 @@ bool CrossEntropyOp<float, CPUContext>::RunOnDevice() {
CAFFE_ENFORCE(
(label.ndim() == 1) || (label.ndim() == 2 && label.dim32(1) == D));
CAFFE_ENFORCE_EQ(label.dim32(0), N);
Y->Resize(vector<TIndex>{N});
Y->Resize(vector<int64_t>{N});
const float* Xdata = X.data<float>();
const float* labelData = label.data<float>();
auto* Ydata = Y->template mutable_data<float>();

View File

@ -42,7 +42,7 @@ bool LabelCrossEntropyOp<float, CUDAContext>::RunOnDevice() {
CAFFE_ENFORCE(
(label.ndim() == 1) || (label.ndim() == 2 && label.dim32(1) == 1));
CAFFE_ENFORCE_EQ(label.dim32(0), N);
Y->Resize(vector<TIndex>(size_t(1), N));
Y->Resize(vector<int64_t>(size_t(1), N));
LabelCrossEntropyKernel<<<
CAFFE_GET_BLOCKS(N),
CAFFE_CUDA_NUM_THREADS,
@ -250,9 +250,9 @@ bool SigmoidCrossEntropyWithLogitsOp<float, CUDAContext>::RunOnDevice() {
auto* out = Output(0);
if (logits.ndim() == 0) {
out->Resize(std::vector<TIndex>{});
out->Resize(std::vector<int64_t>{});
} else {
std::vector<TIndex> dims(logits.dims().begin(), logits.dims().end() - 1);
std::vector<int64_t> dims(logits.dims().begin(), logits.dims().end() - 1);
out->Resize(dims);
}
auto* out_ptr = out->template mutable_data<float>();
@ -372,9 +372,9 @@ bool WeightedSigmoidCrossEntropyWithLogitsOp<float, CUDAContext>::
auto* out = Output(0);
if (logits.ndim() == 0) {
out->Resize(std::vector<TIndex>{});
out->Resize(std::vector<int64_t>{});
} else {
std::vector<TIndex> dims(logits.dims().begin(), logits.dims().end() - 1);
std::vector<int64_t> dims(logits.dims().begin(), logits.dims().end() - 1);
out->Resize(dims);
}
auto* out_ptr = out->template mutable_data<float>();

View File

@ -32,7 +32,7 @@ bool CTCBeamSearchDecoderOp<CPUContext>::RunOnDevice() {
(InputSize() == 2) ? Input(SEQ_LEN).data<int>() : nullptr;
vector<int32_t> values_cache;
output_len->Resize(vector<TIndex>{batch_size});
output_len->Resize(vector<int64_t>{batch_size});
int* output_len_data = output_len->mutable_data<int>();
for (int32_t i = 0; i < batch_size; ++i) {
@ -121,7 +121,7 @@ bool CTCBeamSearchDecoderOp<CPUContext>::RunOnDevice() {
}
int32_t cache_size = values_cache.size();
values->Resize(vector<TIndex>{cache_size});
values->Resize(vector<int64_t>{cache_size});
int* values_data = values->mutable_data<int>();
for (int i = 0; i < values_cache.size(); ++i) {
values_data[i] = values_cache.at(i);

View File

@ -32,7 +32,7 @@ bool CTCGreedyDecoderOp<CPUContext>::RunOnDevice() {
(InputSize() == 2) ? Input(SEQ_LEN).data<int>() : nullptr;
vector<int> values_cach;
output_len->Resize(vector<TIndex>{batch_size});
output_len->Resize(vector<int64_t>{batch_size});
int* output_len_data = output_len->template mutable_data<int>();
for (int32_t i = 0; i < batch_size; ++i) {
@ -54,7 +54,7 @@ bool CTCGreedyDecoderOp<CPUContext>::RunOnDevice() {
}
int32_t values_cach_size = values_cach.size();
values->Resize(vector<TIndex>{values_cach_size});
values->Resize(vector<int64_t>{values_cach_size});
int* values_data = values->mutable_data<int>();
for (int i = 0; i < values_cach.size(); ++i) {
values_data[i] = values_cach.at(i);

View File

@ -155,7 +155,7 @@ void TreeWalker::advance() {
cursor_.it.advance(lengths_, cursor_.offsets, sizes_, limits_, 1);
}
std::vector<TIndex> TreeWalker::fieldDim(int fieldId) const {
std::vector<int64_t> TreeWalker::fieldDim(int fieldId) const {
auto tensorDim = input(fieldId).dims();
tensorDim[0] = sizes_[lengthIdx(fieldId)];
return tensorDim;
@ -355,7 +355,7 @@ class UnPackRecordsOp : public Operator<CPUContext> {
auto numTensors = OutputSize();
// Precomputer the output sizes to avoid resizing
std::vector<std::vector<TIndex>> outputDims(numTensors);
std::vector<std::vector<int64_t>> outputDims(numTensors);
std::vector<const TypeMeta*> metas(numTensors);
CAFFE_ENFORCE(
@ -414,7 +414,7 @@ class UnPackRecordsOp : public Operator<CPUContext> {
private:
void getShapeAndMetaFromInput(
std::vector<std::vector<TIndex>>& outputDims,
std::vector<std::vector<int64_t>>& outputDims,
std::vector<const TypeMeta*>& metas) {
const auto* inputs = Input(0).template data<SharedTensorVectorPtr>();
@ -434,7 +434,7 @@ class UnPackRecordsOp : public Operator<CPUContext> {
}
void getShapeAndMetaFromPrototypeBlobs(
std::vector<std::vector<TIndex>>& outputDims,
std::vector<std::vector<int64_t>>& outputDims,
std::vector<const TypeMeta*>& metas) {
const auto numTensors = fields_.size();
CAFFE_ENFORCE_EQ(numTensors, InputSize() - 1);
@ -501,7 +501,7 @@ class ReadNextBatchOp : public Operator<CPUContext> {
}
}
// gather data
std::vector<TIndex> outDim;
std::vector<int64_t> outDim;
for (int i = 0; i < cursor->it.fields().size(); ++i) {
auto lengthIdx = cursor->it.fields()[i].lengthFieldId + 1;
auto size = sizes[lengthIdx];
@ -676,7 +676,7 @@ class ReadRandomBatchOp : public Operator<CPUContext> {
auto idxvec = idxblob.template data<int64_t>();
auto& offsetdim = offsetsmat.dims();
// gather data
std::vector<TIndex> outDim;
std::vector<int64_t> outDim;
int64_t idx;
{
std::lock_guard<std::mutex> lock(cursor->mutex_);
@ -883,7 +883,7 @@ class ConcatTensorVectorOp final : public Operator<Context> {
auto* tensor = Output(TENSOR);
CAFFE_ENFORCE(!tensorVector->empty());
vector<TIndex> outputDims(tensorVector->at(0).dims());
vector<int64_t> outputDims(tensorVector->at(0).dims());
CAFFE_ENFORCE(outputDims.size() > 0);
for (int i = 1; i < tensorVector->size(); i++) {
// the tensor shapes are the same except for the first dimension
@ -895,7 +895,7 @@ class ConcatTensorVectorOp final : public Operator<Context> {
}
tensor->Resize(outputDims);
TIndex offset = 0;
int64_t offset = 0;
auto* dst = (char*)tensor->raw_mutable_data(tensorVector->at(0).meta());
for (const auto& t : *tensorVector) {

View File

@ -123,7 +123,7 @@ class TreeWalker {
return prevOffsets_[lengthIdx(fieldId)];
}
std::vector<TIndex> fieldDim(int fieldId) const;
std::vector<int64_t> fieldDim(int fieldId) const;
void* fieldPtr(int fieldId) const;
@ -134,12 +134,12 @@ class TreeWalker {
Field(TreeWalker& walker, int fieldId)
: walker_(walker), fieldId_(fieldId) {}
inline std::vector<TIndex> dim() const {
inline std::vector<int64_t> dim() const {
return walker_.fieldDim(fieldId_);
}
inline TIndex size() const {
TIndex size = 1;
inline int64_t size() const {
int64_t size = 1;
for (const auto d : dim()) {
size *= d;
}

View File

@ -67,8 +67,8 @@
namespace caffe2 {
typedef TIndex index_t;
typedef std::vector<TIndex> TShape;
typedef int64_t index_t;
typedef std::vector<int64_t> TShape;
template <typename DType>
__device__ DType deformable_im2col_bilinear(
@ -304,8 +304,8 @@ template <typename DType, typename Context>
void DeformConvOpBase<DType, Context>::DeformableIm2col(
const DType* data_im,
const DType* data_offset,
const std::vector<TIndex>& im_shape,
const std::vector<TIndex>& col_shape,
const std::vector<int64_t>& im_shape,
const std::vector<int64_t>& col_shape,
DType* data_col) {
CHECK_LT(2, CAFFE_CUDA_NUM_THREADS);
CAFFE_ENFORCE_EQ(pad_t(), pad_b());
@ -430,8 +430,8 @@ template <typename DType, typename Context>
void DeformConvOpBase<DType, Context>::DeformableCol2im(
const DType* data_col,
const DType* data_offset,
const std::vector<TIndex>& im_shape,
const std::vector<TIndex>& col_shape,
const std::vector<int64_t>& im_shape,
const std::vector<int64_t>& col_shape,
DType* grad_im) {
CAFFE_ENFORCE_EQ(pad_t(), pad_b());
CAFFE_ENFORCE_EQ(pad_l(), pad_r());
@ -577,8 +577,8 @@ void DeformConvOpBase<DType, Context>::DeformableCol2imCoord(
const DType* data_col,
const DType* data_im,
const DType* data_offset,
const std::vector<TIndex>& im_shape,
const std::vector<TIndex>& col_shape,
const std::vector<int64_t>& im_shape,
const std::vector<int64_t>& col_shape,
DType* grad_offset) {
CAFFE_ENFORCE_EQ(pad_t(), pad_b());
CAFFE_ENFORCE_EQ(pad_l(), pad_r());

Some files were not shown because too many files have changed in this diff Show More