mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
fbshipit-source-id: ba600fcd2b5cefc7621357bdeb05e24cea02e5af
This commit is contained in:
parent
290d20b094
commit
9ec0a2aef4
5
.gitattributes
vendored
5
.gitattributes
vendored
|
|
@ -1,5 +0,0 @@
|
||||||
# Set the default behavior, in case people don't have core.autocrlf set.
|
|
||||||
* text=auto
|
|
||||||
|
|
||||||
# BASH scripts shouldn't be converted since they may need to be used by Docker
|
|
||||||
*.sh text eol=lf
|
|
||||||
|
|
@ -41,13 +41,10 @@ std::tuple<Tensor,Tensor> adaptive_max_pool1d(const Tensor & self, IntList outpu
|
||||||
return std::make_tuple(output.squeeze(2), indices.squeeze(2));
|
return std::make_tuple(output.squeeze(2), indices.squeeze(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<Tensor, Tensor> max_pool1d_with_indices(
|
std::tuple<Tensor,Tensor> max_pool1d(
|
||||||
const Tensor& self,
|
const Tensor & self, IntList kernel_size, IntList stride, IntList padding,
|
||||||
IntList kernel_size,
|
IntList dilation, bool ceil_mode) {
|
||||||
IntList stride,
|
|
||||||
IntList padding,
|
|
||||||
IntList dilation,
|
|
||||||
bool ceil_mode) {
|
|
||||||
if (stride.empty()) {
|
if (stride.empty()) {
|
||||||
stride = kernel_size;
|
stride = kernel_size;
|
||||||
}
|
}
|
||||||
|
|
@ -58,7 +55,7 @@ std::tuple<Tensor, Tensor> max_pool1d_with_indices(
|
||||||
check1d("max_pool1d", "dilation", dilation);
|
check1d("max_pool1d", "dilation", dilation);
|
||||||
|
|
||||||
Tensor output, indices;
|
Tensor output, indices;
|
||||||
std::tie(output, indices) = at::max_pool2d_with_indices(
|
std::tie(output, indices) = at::max_pool2d(
|
||||||
self.unsqueeze(2),
|
self.unsqueeze(2),
|
||||||
{1, kernel_size[0]},
|
{1, kernel_size[0]},
|
||||||
{1, stride[0]},
|
{1, stride[0]},
|
||||||
|
|
@ -94,41 +91,5 @@ Tensor avg_pool1d(
|
||||||
|
|
||||||
return output.squeeze(2);
|
return output.squeeze(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor max_pool1d(
|
|
||||||
const Tensor& self,
|
|
||||||
IntList kernel_size,
|
|
||||||
IntList stride,
|
|
||||||
IntList padding,
|
|
||||||
IntList dilation,
|
|
||||||
bool ceil_mode) {
|
|
||||||
auto output_and_indices = at::max_pool1d_with_indices(
|
|
||||||
self, kernel_size, stride, padding, dilation, ceil_mode);
|
|
||||||
return std::get<0>(output_and_indices);
|
|
||||||
}
|
|
||||||
|
|
||||||
Tensor max_pool2d(
|
|
||||||
const Tensor& self,
|
|
||||||
IntList kernel_size,
|
|
||||||
IntList stride,
|
|
||||||
IntList padding,
|
|
||||||
IntList dilation,
|
|
||||||
bool ceil_mode) {
|
|
||||||
auto output_and_indices = at::max_pool2d_with_indices(
|
|
||||||
self, kernel_size, stride, padding, dilation, ceil_mode);
|
|
||||||
return std::get<0>(output_and_indices);
|
|
||||||
}
|
|
||||||
|
|
||||||
Tensor max_pool3d(
|
|
||||||
const Tensor& self,
|
|
||||||
IntList kernel_size,
|
|
||||||
IntList stride,
|
|
||||||
IntList padding,
|
|
||||||
IntList dilation,
|
|
||||||
bool ceil_mode) {
|
|
||||||
auto output_and_indices = at::max_pool3d_with_indices(
|
|
||||||
self, kernel_size, stride, padding, dilation, ceil_mode);
|
|
||||||
return std::get<0>(output_and_indices);
|
|
||||||
}
|
|
||||||
} // namespace native
|
} // namespace native
|
||||||
} // namespace at
|
} // namespace at
|
||||||
|
|
|
||||||
|
|
@ -817,16 +817,7 @@
|
||||||
|
|
||||||
- func: max_values(Tensor self, int64_t dim, bool keepdim=false) -> Tensor
|
- func: max_values(Tensor self, int64_t dim, bool keepdim=false) -> Tensor
|
||||||
|
|
||||||
- func: max_pool1d_with_indices(Tensor self, IntList[1] kernel_size, IntList[1] stride={}, IntList[1] padding=0, IntList[1] dilation=1, bool ceil_mode=false) -> (Tensor, Tensor)
|
- func: max_pool1d(Tensor self, IntList[1] kernel_size, IntList[1] stride={}, IntList[1] padding=0, IntList[1] dilation=1, bool ceil_mode=false) -> (Tensor, Tensor)
|
||||||
variants: function
|
|
||||||
|
|
||||||
- func: max_pool1d(Tensor self, IntList[1] kernel_size, IntList[1] stride={}, IntList[1] padding=0, IntList[1] dilation=1, bool ceil_mode=false) -> Tensor
|
|
||||||
variants: function
|
|
||||||
|
|
||||||
- func: max_pool2d(Tensor self, IntList[1] kernel_size, IntList[1] stride={}, IntList[1] padding=0, IntList[1] dilation=1, bool ceil_mode=false) -> Tensor
|
|
||||||
variants: function
|
|
||||||
|
|
||||||
- func: max_pool3d(Tensor self, IntList[1] kernel_size, IntList[1] stride={}, IntList[1] padding=0, IntList[1] dilation=1, bool ceil_mode=false) -> Tensor
|
|
||||||
variants: function
|
variants: function
|
||||||
|
|
||||||
# FIXME: These could be combined as optional<ScalarType> but for https://github.com/pytorch/pytorch/issues/6593.
|
# FIXME: These could be combined as optional<ScalarType> but for https://github.com/pytorch/pytorch/issues/6593.
|
||||||
|
|
|
||||||
|
|
@ -149,12 +149,12 @@
|
||||||
scalar_check:
|
scalar_check:
|
||||||
output: 'false'
|
output: 'false'
|
||||||
|
|
||||||
- name: max_pool2d_with_indices(Tensor self, IntList[2] kernel_size, IntList[2] stride={}, IntList[2] padding=0, IntList[2] dilation=1, bool ceil_mode=false)
|
- name: max_pool2d(Tensor self, IntList[2] kernel_size, IntList[2] stride={}, IntList[2] padding=0, IntList[2] dilation=1, bool ceil_mode=false)
|
||||||
cname: SpatialDilatedMaxPooling
|
cname: SpatialDilatedMaxPooling
|
||||||
default_init:
|
default_init:
|
||||||
stride: kernel_size
|
stride: kernel_size
|
||||||
|
|
||||||
- name: max_pool3d_with_indices(Tensor self, IntList[3] kernel_size, IntList[3] stride={}, IntList[3] padding=0, IntList[3] dilation=1, bool ceil_mode=false)
|
- name: max_pool3d(Tensor self, IntList[3] kernel_size, IntList[3] stride={}, IntList[3] padding=0, IntList[3] dilation=1, bool ceil_mode=false)
|
||||||
cname: VolumetricDilatedMaxPooling
|
cname: VolumetricDilatedMaxPooling
|
||||||
default_init:
|
default_init:
|
||||||
stride: kernel_size
|
stride: kernel_size
|
||||||
|
|
|
||||||
|
|
@ -521,7 +521,7 @@ TEST(TensorTest, TensorNonFundamentalType) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(TensorTest, TensorNonFundamentalTypeCopy) {
|
TEST(TensorTest, TensorNonFundamentalTypeClone) {
|
||||||
TensorCPU tensor(vector<int>{2, 3, 4});
|
TensorCPU tensor(vector<int>{2, 3, 4});
|
||||||
std::string* ptr = tensor.mutable_data<std::string>();
|
std::string* ptr = tensor.mutable_data<std::string>();
|
||||||
EXPECT_TRUE(ptr != nullptr);
|
EXPECT_TRUE(ptr != nullptr);
|
||||||
|
|
@ -529,11 +529,20 @@ TEST(TensorTest, TensorNonFundamentalTypeCopy) {
|
||||||
EXPECT_TRUE(ptr[i] == "");
|
EXPECT_TRUE(ptr[i] == "");
|
||||||
ptr[i] = "filled";
|
ptr[i] = "filled";
|
||||||
}
|
}
|
||||||
TensorCPU dst_tensor(tensor);
|
TensorCPU dst_tensor = tensor.Clone();
|
||||||
const std::string* dst_ptr = dst_tensor.data<std::string>();
|
const std::string* dst_ptr = dst_tensor.data<std::string>();
|
||||||
for (int i = 0; i < dst_tensor.size(); ++i) {
|
for (int i = 0; i < dst_tensor.size(); ++i) {
|
||||||
EXPECT_TRUE(dst_ptr[i] == "filled");
|
EXPECT_TRUE(dst_ptr[i] == "filled");
|
||||||
}
|
}
|
||||||
|
// Change the original tensor
|
||||||
|
for (int i = 0; i < tensor.size(); ++i) {
|
||||||
|
EXPECT_TRUE(ptr[i] == "filled");
|
||||||
|
ptr[i] = "changed";
|
||||||
|
}
|
||||||
|
// Confirm that the cloned tensor is not affect
|
||||||
|
for (int i = 0; i < dst_tensor.size(); ++i) {
|
||||||
|
EXPECT_TRUE(dst_ptr[i] == "filled");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(TensorTest, Tensor64BitDimension) {
|
TEST(TensorTest, Tensor64BitDimension) {
|
||||||
|
|
@ -1060,5 +1069,47 @@ TEST(BlobTest, CastingMessage) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(TensorConstruction, UnitializedCopyTest) {
|
||||||
|
CPUContext context;
|
||||||
|
TensorCPU x;
|
||||||
|
TensorCPU y(x, &context);
|
||||||
|
TensorCPU z = x.Clone();
|
||||||
|
// should be uninitialized
|
||||||
|
EXPECT_EQ(x.size(), -1);
|
||||||
|
EXPECT_EQ(y.size(), -1);
|
||||||
|
LOG(INFO) << "z.size()" << z.size();
|
||||||
|
EXPECT_EQ(z.size(), -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TensorConstruction, CopyConstructorTest) {
|
||||||
|
CPUContext context;
|
||||||
|
|
||||||
|
TensorCPU x;
|
||||||
|
x.Resize(5);
|
||||||
|
x.mutable_data<float>()[0] = 1;
|
||||||
|
TensorCPU y = x.Clone();
|
||||||
|
TensorCPU z(x, &context);
|
||||||
|
TensorCPU w;
|
||||||
|
|
||||||
|
EXPECT_EQ(*x.data<float>(), 1);
|
||||||
|
EXPECT_EQ(*y.data<float>(), 1);
|
||||||
|
EXPECT_EQ(*z.data<float>(), 1);
|
||||||
|
x.mutable_data<float>()[0] = 5;
|
||||||
|
EXPECT_EQ(*x.data<float>(), 5);
|
||||||
|
EXPECT_EQ(*y.data<float>(), 1);
|
||||||
|
EXPECT_EQ(*z.data<float>(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TensorConstruction, MoveConstructorTest) {
|
||||||
|
CPUContext context;
|
||||||
|
|
||||||
|
TensorCPU x;
|
||||||
|
x.Resize(5);
|
||||||
|
x.mutable_data<float>()[0] = 1;
|
||||||
|
TensorCPU y = std::move(x);
|
||||||
|
|
||||||
|
EXPECT_EQ(*y.data<float>(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
|
|
||||||
|
|
@ -12,9 +12,9 @@
|
||||||
#include <cuda.h>
|
#include <cuda.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (!defined(__CUDACC__) || CUDA_VERSION > 9000 ) && !defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
|
#if !defined(__CUDACC__) && !defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
|
||||||
#include <glog/stl_logging.h>
|
#include <glog/stl_logging.h>
|
||||||
#else // (!defined(__CUDACC__) || CUDA_VERSION > 9000 ) && !defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
|
#else // !defined(__CUDACC__) && !defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
|
||||||
|
|
||||||
// here, we need to register a fake overload for vector/string - here,
|
// here, we need to register a fake overload for vector/string - here,
|
||||||
// we just ignore the entries in the logs.
|
// we just ignore the entries in the logs.
|
||||||
|
|
|
||||||
|
|
@ -555,13 +555,32 @@ class GivenTensorFill : public NeuralNetOperator {
|
||||||
|
|
||||||
class Concat : public NeuralNetOperator {
|
class Concat : public NeuralNetOperator {
|
||||||
public:
|
public:
|
||||||
Concat() : NeuralNetOperator(NNKind::Concat) {}
|
Concat(int axis = -1, bool addAxis = false)
|
||||||
|
: NeuralNetOperator(NNKind::Concat), Axis(axis), AddAxis(addAxis) {}
|
||||||
|
|
||||||
~Concat() {}
|
~Concat() {}
|
||||||
|
|
||||||
NOMNIGRAPH_DEFINE_NN_RTTI(Concat);
|
NOMNIGRAPH_DEFINE_NN_RTTI(Concat);
|
||||||
|
|
||||||
|
int getAxis() const {
|
||||||
|
return Axis;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool getAddAxis() const {
|
||||||
|
return AddAxis;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setAxis(int axis) {
|
||||||
|
Axis = axis;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setAddAxis(bool addAxis) {
|
||||||
|
AddAxis = addAxis;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
int Axis;
|
||||||
|
bool AddAxis;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Softmax : public NeuralNetOperator {
|
class Softmax : public NeuralNetOperator {
|
||||||
|
|
@ -908,3 +927,68 @@ class Int8MaxPoolRelu : public NeuralNetOperator {
|
||||||
|
|
||||||
private:
|
private:
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class BatchMatMul : public NeuralNetOperator {
|
||||||
|
public:
|
||||||
|
BatchMatMul(bool transA = false, bool transB = true, bool broadcast = false)
|
||||||
|
: NeuralNetOperator(NNKind::BatchMatMul),
|
||||||
|
TransA(transA),
|
||||||
|
TransB(transB),
|
||||||
|
Broadcast(broadcast) {}
|
||||||
|
|
||||||
|
~BatchMatMul() {}
|
||||||
|
|
||||||
|
NOMNIGRAPH_DEFINE_NN_RTTI(BatchMatMul);
|
||||||
|
|
||||||
|
bool getTransA() const {
|
||||||
|
return TransA;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool getTransB() const {
|
||||||
|
return TransB;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool getBroadcast() const {
|
||||||
|
return Broadcast;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setTransA(bool transA) {
|
||||||
|
TransA = transA;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setTransB(bool transB) {
|
||||||
|
TransB = transB;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setBroadcast(bool broadcast) {
|
||||||
|
Broadcast = broadcast;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool TransA;
|
||||||
|
bool TransB;
|
||||||
|
bool Broadcast;
|
||||||
|
};
|
||||||
|
|
||||||
|
class BatchGather : public NeuralNetOperator {
|
||||||
|
public:
|
||||||
|
BatchGather() : NeuralNetOperator(NNKind::BatchGather) {}
|
||||||
|
|
||||||
|
~BatchGather() {}
|
||||||
|
|
||||||
|
NOMNIGRAPH_DEFINE_NN_RTTI(BatchGather);
|
||||||
|
|
||||||
|
private:
|
||||||
|
};
|
||||||
|
|
||||||
|
class ConcatBatchMatMulBatchGatherOp : public NeuralNetOperator {
|
||||||
|
public:
|
||||||
|
ConcatBatchMatMulBatchGatherOp()
|
||||||
|
: NeuralNetOperator(NNKind::ConcatBatchMatMulBatchGatherOp) {}
|
||||||
|
|
||||||
|
~ConcatBatchMatMulBatchGatherOp() {}
|
||||||
|
|
||||||
|
NOMNIGRAPH_DEFINE_NN_RTTI(ConcatBatchMatMulBatchGatherOp);
|
||||||
|
|
||||||
|
private:
|
||||||
|
};
|
||||||
|
|
|
||||||
|
|
@ -5,4 +5,5 @@ Relu, Conv, ConvRelu, ConvTranspose, AveragePool, AveragePoolRelu, MaxPool,
|
||||||
Int8Conv, Int8ConvTranspose, Int8FC, Int8MaxPool, Int8Relu,
|
Int8Conv, Int8ConvTranspose, Int8FC, Int8MaxPool, Int8Relu,
|
||||||
Int8GivenTensorFill, Int8Concat, Int8Softmax, Int8ChannelShuffle, Int8Sum,
|
Int8GivenTensorFill, Int8Concat, Int8Softmax, Int8ChannelShuffle, Int8Sum,
|
||||||
Int8Add, Int8Reshape, Int8Flatten, Int8ConvRelu, Int8SumRelu,
|
Int8Add, Int8Reshape, Int8Flatten, Int8ConvRelu, Int8SumRelu,
|
||||||
Int8AveragePoolRelu, Int8MaxPoolRelu
|
Int8AveragePoolRelu, Int8MaxPoolRelu, BatchMatMul, BatchGather,
|
||||||
|
ConcatBatchMatMulBatchGatherOp
|
||||||
|
|
|
||||||
|
|
@ -84,3 +84,9 @@ case NNKind::Int8AveragePoolRelu:
|
||||||
return "Int8AveragePoolRelu";
|
return "Int8AveragePoolRelu";
|
||||||
case NNKind::Int8MaxPoolRelu:
|
case NNKind::Int8MaxPoolRelu:
|
||||||
return "Int8MaxPoolRelu";
|
return "Int8MaxPoolRelu";
|
||||||
|
case NNKind::BatchMatMul:
|
||||||
|
return "BatchMatMul";
|
||||||
|
case NNKind::BatchGather:
|
||||||
|
return "BatchGather";
|
||||||
|
case NNKind::ConcatBatchMatMulBatchGatherOp:
|
||||||
|
return "ConcatBatchMatMulBatchGatherOp";
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,24 @@
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <list>
|
#include <list>
|
||||||
|
|
||||||
|
// These #defines are useful when writing passes as the collapse
|
||||||
|
//
|
||||||
|
// if (!cond) {
|
||||||
|
// continue; // or break; or return;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// into a single line without negation
|
||||||
|
|
||||||
|
#define NOM_REQUIRE_OR_(_cond, _expr) \
|
||||||
|
if (!(_cond)) { \
|
||||||
|
_expr; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define NOM_REQUIRE_OR_CONT(_cond) NOM_REQUIRE_OR_(_cond, continue)
|
||||||
|
#define NOM_REQUIRE_OR_BREAK(_cond) NOM_REQUIRE_OR_(_cond, break)
|
||||||
|
#define NOM_REQUIRE_OR_RET_NULL(_cond) NOM_REQUIRE_OR_(_cond, return nullptr)
|
||||||
|
#define NOM_REQUIRE_OR_RET(_cond) NOM_REQUIRE_OR_(_cond, return )
|
||||||
|
|
||||||
// Implements accessors for a generic type T. If the type is not
|
// Implements accessors for a generic type T. If the type is not
|
||||||
// specified (i.e., void template type) then the partial specification
|
// specified (i.e., void template type) then the partial specification
|
||||||
// gives an empty type.
|
// gives an empty type.
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,8 @@ BatchNormalization
|
||||||
FC
|
FC
|
||||||
GivenTensorFill
|
GivenTensorFill
|
||||||
Concat
|
Concat
|
||||||
|
- Axis : int : -1
|
||||||
|
- AddAxis : bool : false
|
||||||
Softmax
|
Softmax
|
||||||
ChannelShuffle
|
ChannelShuffle
|
||||||
Add
|
Add
|
||||||
|
|
@ -84,3 +86,10 @@ Int8ConvRelu : ConvRelu
|
||||||
Int8SumRelu : SumRelu
|
Int8SumRelu : SumRelu
|
||||||
Int8AveragePoolRelu : AveragePoolRelu
|
Int8AveragePoolRelu : AveragePoolRelu
|
||||||
Int8MaxPoolRelu : MaxPoolRelu
|
Int8MaxPoolRelu : MaxPoolRelu
|
||||||
|
|
||||||
|
BatchMatMul
|
||||||
|
- TransA : bool : false
|
||||||
|
- TransB : bool : true
|
||||||
|
- Broadcast: bool : false
|
||||||
|
BatchGather
|
||||||
|
ConcatBatchMatMulBatchGatherOp
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,7 @@ struct WorkspaceIdInjector {
|
||||||
void InjectWorkspaceId(Workspace* workspace) {
|
void InjectWorkspaceId(Workspace* workspace) {
|
||||||
if (workspace->HasBlob(NODE_ID)) {
|
if (workspace->HasBlob(NODE_ID)) {
|
||||||
Blob* node_id_blob = workspace->GetBlob(NODE_ID);
|
Blob* node_id_blob = workspace->GetBlob(NODE_ID);
|
||||||
TensorCPU node_id_tensor = node_id_blob->template Get<TensorCPU>();
|
const TensorCPU& node_id_tensor = node_id_blob->template Get<TensorCPU>();
|
||||||
int node_id = node_id_tensor.template data<int32_t>()[0];
|
int node_id = node_id_tensor.template data<int32_t>()[0];
|
||||||
CAFFE_ENFORCE(
|
CAFFE_ENFORCE(
|
||||||
seq_ < (1 << 16),
|
seq_ < (1 << 16),
|
||||||
|
|
|
||||||
|
|
@ -168,6 +168,15 @@ class Tensor {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
meta_ = src.meta();
|
meta_ = src.meta();
|
||||||
|
if (src.size() == -1) {
|
||||||
|
dims_.clear();
|
||||||
|
size_ = -1;
|
||||||
|
data_.reset();
|
||||||
|
shares_data_ = false;
|
||||||
|
capacity_ = 0;
|
||||||
|
reserved_ = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
Resize(src.dims());
|
Resize(src.dims());
|
||||||
if (size() > 0) {
|
if (size() > 0) {
|
||||||
if (meta_.copy()) {
|
if (meta_.copy()) {
|
||||||
|
|
@ -681,6 +690,21 @@ class Tensor {
|
||||||
return dims_[i];
|
return dims_[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Tensor Clone() const {
|
||||||
|
Tensor x;
|
||||||
|
x.CopyFrom(*this);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
Tensor(Tensor<Context>&& src) noexcept {
|
||||||
|
swap(src);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Delete the copy constructor and use Clone explicitly
|
||||||
|
*/
|
||||||
|
Tensor(const Tensor<Context>& src) = delete;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
vector<TIndex> dims_;
|
vector<TIndex> dims_;
|
||||||
TIndex size_ = -1;
|
TIndex size_ = -1;
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,9 @@ class CaffeTypeId final : public c10::guts::IdWrapper<CaffeTypeId, uint16_t> {
|
||||||
public:
|
public:
|
||||||
static CaffeTypeId createTypeId();
|
static CaffeTypeId createTypeId();
|
||||||
|
|
||||||
friend std::ostream& operator<<(std::ostream& stream, CaffeTypeId typeId);
|
friend std::ostream& operator<<(std::ostream& stream, CaffeTypeId typeId) {
|
||||||
|
return stream << typeId.underlyingId();
|
||||||
|
}
|
||||||
friend bool operator<(CaffeTypeId lhs, CaffeTypeId rhs);
|
friend bool operator<(CaffeTypeId lhs, CaffeTypeId rhs);
|
||||||
|
|
||||||
// TODO Can we get rid of uninitialized?
|
// TODO Can we get rid of uninitialized?
|
||||||
|
|
@ -39,10 +41,6 @@ private:
|
||||||
constexpr explicit CaffeTypeId(uint16_t id): IdWrapper(id) {}
|
constexpr explicit CaffeTypeId(uint16_t id): IdWrapper(id) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& stream, CaffeTypeId typeId) {
|
|
||||||
return stream << typeId.underlyingId();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allow usage in std::map / std::set
|
// Allow usage in std::map / std::set
|
||||||
// TODO Disallow this and rather use std::unordered_map/set everywhere
|
// TODO Disallow this and rather use std::unordered_map/set everywhere
|
||||||
inline bool operator<(CaffeTypeId lhs, CaffeTypeId rhs) {
|
inline bool operator<(CaffeTypeId lhs, CaffeTypeId rhs) {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "caffe2/experiments/operators/fully_connected_op_decomposition.h"
|
#include "caffe2/experiments/operators/fully_connected_op_decomposition.h"
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef CAFFE2_OPERATORS_FULLY_CONNECTED_OP_DECOMPOSITION_H_
|
#ifndef CAFFE2_OPERATORS_FULLY_CONNECTED_OP_DECOMPOSITION_H_
|
||||||
#define CAFFE2_OPERATORS_FULLY_CONNECTED_OP_DECOMPOSITION_H_
|
#define CAFFE2_OPERATORS_FULLY_CONNECTED_OP_DECOMPOSITION_H_
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "caffe2/core/context_gpu.h"
|
#include "caffe2/core/context_gpu.h"
|
||||||
#include "caffe2/experiments/operators/fully_connected_op_decomposition.h"
|
#include "caffe2/experiments/operators/fully_connected_op_decomposition.h"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "caffe2/experiments/operators/fully_connected_op_prune.h"
|
#include "caffe2/experiments/operators/fully_connected_op_prune.h"
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef CAFFE2_OPERATORS_FULLY_CONNECTED_OP_PRUNE_H_
|
#ifndef CAFFE2_OPERATORS_FULLY_CONNECTED_OP_PRUNE_H_
|
||||||
#define CAFFE2_OPERATORS_FULLY_CONNECTED_OP_PRUNE_H_
|
#define CAFFE2_OPERATORS_FULLY_CONNECTED_OP_PRUNE_H_
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "caffe2/experiments/operators/fully_connected_op_sparse.h"
|
#include "caffe2/experiments/operators/fully_connected_op_sparse.h"
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef CAFFE2_OPERATORS_FULLY_CONNECTED_OP_SPARSE_H_
|
#ifndef CAFFE2_OPERATORS_FULLY_CONNECTED_OP_SPARSE_H_
|
||||||
#define CAFFE2_OPERATORS_FULLY_CONNECTED_OP_SPARSE_H_
|
#define CAFFE2_OPERATORS_FULLY_CONNECTED_OP_SPARSE_H_
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "caffe2/experiments/operators/funhash_op.h"
|
#include "caffe2/experiments/operators/funhash_op.h"
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef CAFFE2_OPERATORS_FUNHASH_OP_H_
|
#ifndef CAFFE2_OPERATORS_FUNHASH_OP_H_
|
||||||
#define CAFFE2_OPERATORS_FUNHASH_OP_H_
|
#define CAFFE2_OPERATORS_FUNHASH_OP_H_
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "caffe2/experiments/operators/sparse_funhash_op.h"
|
#include "caffe2/experiments/operators/sparse_funhash_op.h"
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef CAFFE2_OPERATORS_SPARSE_FUNHASH_OP_H_
|
#ifndef CAFFE2_OPERATORS_SPARSE_FUNHASH_OP_H_
|
||||||
#define CAFFE2_OPERATORS_SPARSE_FUNHASH_OP_H_
|
#define CAFFE2_OPERATORS_SPARSE_FUNHASH_OP_H_
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "caffe2/experiments/operators/sparse_matrix_reshape_op.h"
|
#include "caffe2/experiments/operators/sparse_matrix_reshape_op.h"
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef CAFFE2_OPERATORS_SPARSE_MATRIX_RESHAPE_H_
|
#ifndef CAFFE2_OPERATORS_SPARSE_MATRIX_RESHAPE_H_
|
||||||
#define CAFFE2_OPERATORS_SPARSE_MATRIX_RESHAPE_H_
|
#define CAFFE2_OPERATORS_SPARSE_MATRIX_RESHAPE_H_
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "caffe2/experiments/operators/tt_contraction_op.h"
|
#include "caffe2/experiments/operators/tt_contraction_op.h"
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef CAFFE2_OPERATORS_TT_CONTRACTION_OP_H_
|
#ifndef CAFFE2_OPERATORS_TT_CONTRACTION_OP_H_
|
||||||
#define CAFFE2_OPERATORS_TT_CONTRACTION_OP_H_
|
#define CAFFE2_OPERATORS_TT_CONTRACTION_OP_H_
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "caffe2/core/context_gpu.h"
|
#include "caffe2/core/context_gpu.h"
|
||||||
#include "caffe2/experiments/operators/tt_contraction_op.h"
|
#include "caffe2/experiments/operators/tt_contraction_op.h"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "caffe2/experiments/operators/tt_pad_op.h"
|
#include "caffe2/experiments/operators/tt_pad_op.h"
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef CAFFE2_OPERATORS_TT_PAD_OP_H_
|
#ifndef CAFFE2_OPERATORS_TT_PAD_OP_H_
|
||||||
#define CAFFE2_OPERATORS_TT_PAD_OP_H_
|
#define CAFFE2_OPERATORS_TT_PAD_OP_H_
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,18 @@
|
||||||
|
# Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
## @package SparseTransformer
|
## @package SparseTransformer
|
||||||
# Module caffe2.experiments.python.SparseTransformer
|
# Module caffe2.experiments.python.SparseTransformer
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,18 @@
|
||||||
|
# Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
## @package convnet_benchmarks
|
## @package convnet_benchmarks
|
||||||
# Module caffe2.experiments.python.convnet_benchmarks
|
# Module caffe2.experiments.python.convnet_benchmarks
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,18 @@
|
||||||
|
# Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
## @package device_reduce_sum_bench
|
## @package device_reduce_sum_bench
|
||||||
# Module caffe2.experiments.python.device_reduce_sum_bench
|
# Module caffe2.experiments.python.device_reduce_sum_bench
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,18 @@
|
||||||
|
# Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,18 @@
|
||||||
|
# Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
## @package net_construct_bench
|
## @package net_construct_bench
|
||||||
# Module caffe2.experiments.python.net_construct_bench
|
# Module caffe2.experiments.python.net_construct_bench
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,18 @@
|
||||||
|
# Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,18 @@
|
||||||
|
# Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,18 @@
|
||||||
|
# Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,18 @@
|
||||||
|
# Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
||||||
# Android makefile
|
|
||||||
# Build this using ndk as
|
|
||||||
# ndk-build NDK_PROJECT_PATH=. APP_BUILD_SCRIPT=Android.mk
|
|
||||||
#
|
|
||||||
|
|
||||||
LOCAL_PATH := $(call my-dir)
|
|
||||||
|
|
||||||
include $(CLEAR_VARS)
|
|
||||||
LOCAL_MODULE := libOpenCL
|
|
||||||
LOCAL_C_INCLUDES := $(LOCAL_PATH)/include/
|
|
||||||
LOCAL_SRC_FILES := src/libopencl.c
|
|
||||||
LOCAL_CFLAGS = -fPIC -O2
|
|
||||||
|
|
||||||
include $(BUILD_STATIC_LIBRARY)
|
|
||||||
|
|
||||||
|
|
@ -286,7 +286,8 @@ std::unique_ptr<QConvState> create2b1bConvState(Workspace* ws,
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
if (b) {
|
if (b) {
|
||||||
state->bias = caffe2::make_unique<TensorCPU>(*b);
|
CPUContext context;
|
||||||
|
state->bias = caffe2::make_unique<TensorCPU>(*b, &context);
|
||||||
}
|
}
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -78,7 +78,7 @@ Y: [0.3005476 1.551666 1.3591481 0.39191285 0.21866608]
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
)DOC")
|
)DOC")
|
||||||
.Input(0, "X", "*(type: Tensor<float\>)* Input tensor.")
|
.Input(0, "X", "*(type: Tensor<float\\>)* Input tensor.")
|
||||||
.Output(
|
.Output(
|
||||||
0,
|
0,
|
||||||
"Y",
|
"Y",
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ void elementwiseAnd() {
|
||||||
EXPECT_TRUE(op->Run());
|
EXPECT_TRUE(op->Run());
|
||||||
auto* blob = ws.GetBlob("Z");
|
auto* blob = ws.GetBlob("Z");
|
||||||
EXPECT_NE(nullptr, blob);
|
EXPECT_NE(nullptr, blob);
|
||||||
caffe2::TensorCPU Z(blob->Get<caffe2::Tensor<Context>>());
|
const auto& Z = blob->Get<caffe2::Tensor<Context>>();
|
||||||
EXPECT_EQ(Z.size(), N);
|
EXPECT_EQ(Z.size(), N);
|
||||||
std::vector<bool> result{true, false, false, false};
|
std::vector<bool> result{true, false, false, false};
|
||||||
for (size_t i = 0; i < Z.size(); ++i) {
|
for (size_t i = 0; i < Z.size(); ++i) {
|
||||||
|
|
@ -79,7 +79,7 @@ void elementwiseAnd() {
|
||||||
EXPECT_TRUE(op->Run());
|
EXPECT_TRUE(op->Run());
|
||||||
auto* blob = ws.GetBlob("Z");
|
auto* blob = ws.GetBlob("Z");
|
||||||
EXPECT_NE(nullptr, blob);
|
EXPECT_NE(nullptr, blob);
|
||||||
caffe2::TensorCPU Z(blob->Get<caffe2::Tensor<Context>>());
|
const auto& Z = blob->Get<caffe2::Tensor<Context>>();
|
||||||
EXPECT_EQ(Z.size(), M * N);
|
EXPECT_EQ(Z.size(), M * N);
|
||||||
std::vector<bool> result{
|
std::vector<bool> result{
|
||||||
true, false, false, false, true, false, false, false};
|
true, false, false, false, true, false, false, false};
|
||||||
|
|
@ -105,7 +105,7 @@ void elementwiseOr() {
|
||||||
EXPECT_TRUE(op->Run());
|
EXPECT_TRUE(op->Run());
|
||||||
auto* blob = ws.GetBlob("Z");
|
auto* blob = ws.GetBlob("Z");
|
||||||
EXPECT_NE(nullptr, blob);
|
EXPECT_NE(nullptr, blob);
|
||||||
caffe2::TensorCPU Z(blob->Get<caffe2::Tensor<Context>>());
|
const auto& Z = blob->Get<caffe2::Tensor<Context>>();
|
||||||
EXPECT_EQ(Z.size(), N);
|
EXPECT_EQ(Z.size(), N);
|
||||||
std::vector<bool> result{true, true, true, false};
|
std::vector<bool> result{true, true, true, false};
|
||||||
for (size_t i = 0; i < Z.size(); ++i) {
|
for (size_t i = 0; i < Z.size(); ++i) {
|
||||||
|
|
@ -125,7 +125,7 @@ void elementwiseOr() {
|
||||||
EXPECT_TRUE(op->Run());
|
EXPECT_TRUE(op->Run());
|
||||||
auto* blob = ws.GetBlob("Z");
|
auto* blob = ws.GetBlob("Z");
|
||||||
EXPECT_NE(nullptr, blob);
|
EXPECT_NE(nullptr, blob);
|
||||||
caffe2::TensorCPU Z(blob->Get<caffe2::Tensor<Context>>());
|
const auto& Z = blob->Get<caffe2::Tensor<Context>>();
|
||||||
EXPECT_EQ(Z.size(), M * N);
|
EXPECT_EQ(Z.size(), M * N);
|
||||||
std::vector<bool> result{true, true, true, false, true, true, true, false};
|
std::vector<bool> result{true, true, true, false, true, true, true, false};
|
||||||
for (size_t i = 0; i < Z.size(); ++i) {
|
for (size_t i = 0; i < Z.size(); ++i) {
|
||||||
|
|
@ -150,7 +150,7 @@ void elementwiseXor() {
|
||||||
EXPECT_TRUE(op->Run());
|
EXPECT_TRUE(op->Run());
|
||||||
auto* blob = ws.GetBlob("Z");
|
auto* blob = ws.GetBlob("Z");
|
||||||
EXPECT_NE(nullptr, blob);
|
EXPECT_NE(nullptr, blob);
|
||||||
caffe2::TensorCPU Z(blob->Get<caffe2::Tensor<Context>>());
|
const auto& Z = blob->Get<caffe2::Tensor<Context>>();
|
||||||
EXPECT_EQ(Z.size(), N);
|
EXPECT_EQ(Z.size(), N);
|
||||||
std::vector<bool> result{false, true, true, false};
|
std::vector<bool> result{false, true, true, false};
|
||||||
for (size_t i = 0; i < Z.size(); ++i) {
|
for (size_t i = 0; i < Z.size(); ++i) {
|
||||||
|
|
@ -170,7 +170,7 @@ void elementwiseXor() {
|
||||||
EXPECT_TRUE(op->Run());
|
EXPECT_TRUE(op->Run());
|
||||||
auto* blob = ws.GetBlob("Z");
|
auto* blob = ws.GetBlob("Z");
|
||||||
EXPECT_NE(nullptr, blob);
|
EXPECT_NE(nullptr, blob);
|
||||||
caffe2::TensorCPU Z(blob->Get<caffe2::Tensor<Context>>());
|
const auto& Z = blob->Get<caffe2::Tensor<Context>>();
|
||||||
EXPECT_EQ(Z.size(), M * N);
|
EXPECT_EQ(Z.size(), M * N);
|
||||||
std::vector<bool> result{
|
std::vector<bool> result{
|
||||||
false, true, true, false, false, true, true, false};
|
false, true, true, false, false, true, true, false};
|
||||||
|
|
@ -195,7 +195,7 @@ void elementwiseNot() {
|
||||||
EXPECT_TRUE(op->Run());
|
EXPECT_TRUE(op->Run());
|
||||||
auto* blob = ws.GetBlob("Y");
|
auto* blob = ws.GetBlob("Y");
|
||||||
EXPECT_NE(nullptr, blob);
|
EXPECT_NE(nullptr, blob);
|
||||||
caffe2::TensorCPU Y(blob->Get<caffe2::Tensor<Context>>());
|
const auto& Y = blob->Get<caffe2::Tensor<Context>>();
|
||||||
EXPECT_EQ(Y.size(), N);
|
EXPECT_EQ(Y.size(), N);
|
||||||
std::vector<bool> result{false, true};
|
std::vector<bool> result{false, true};
|
||||||
for (size_t i = 0; i < Y.size(); ++i) {
|
for (size_t i = 0; i < Y.size(); ++i) {
|
||||||
|
|
@ -217,7 +217,7 @@ void elementwiseEQ() {
|
||||||
EXPECT_TRUE(op->Run());
|
EXPECT_TRUE(op->Run());
|
||||||
auto* blob = ws.GetBlob("Z");
|
auto* blob = ws.GetBlob("Z");
|
||||||
EXPECT_NE(nullptr, blob);
|
EXPECT_NE(nullptr, blob);
|
||||||
caffe2::TensorCPU Z(blob->Get<caffe2::Tensor<Context>>());
|
const auto& Z = blob->Get<caffe2::Tensor<Context>>();
|
||||||
EXPECT_EQ(Z.size(), N);
|
EXPECT_EQ(Z.size(), N);
|
||||||
std::vector<bool> result{false, true, false, true};
|
std::vector<bool> result{false, true, false, true};
|
||||||
for (size_t i = 0; i < Z.size(); ++i) {
|
for (size_t i = 0; i < Z.size(); ++i) {
|
||||||
|
|
@ -234,7 +234,7 @@ void elementwiseEQ() {
|
||||||
EXPECT_TRUE(op->Run());
|
EXPECT_TRUE(op->Run());
|
||||||
auto* blob = ws.GetBlob("Z");
|
auto* blob = ws.GetBlob("Z");
|
||||||
EXPECT_NE(nullptr, blob);
|
EXPECT_NE(nullptr, blob);
|
||||||
caffe2::TensorCPU Z(blob->Get<caffe2::Tensor<Context>>());
|
const auto& Z = blob->Get<caffe2::Tensor<Context>>();
|
||||||
EXPECT_EQ(Z.size(), N);
|
EXPECT_EQ(Z.size(), N);
|
||||||
std::vector<bool> result{true, true, false, false};
|
std::vector<bool> result{true, true, false, false};
|
||||||
for (size_t i = 0; i < Z.size(); ++i) {
|
for (size_t i = 0; i < Z.size(); ++i) {
|
||||||
|
|
@ -253,7 +253,7 @@ void elementwiseEQ() {
|
||||||
EXPECT_TRUE(op->Run());
|
EXPECT_TRUE(op->Run());
|
||||||
auto* blob = ws.GetBlob("Z");
|
auto* blob = ws.GetBlob("Z");
|
||||||
EXPECT_NE(nullptr, blob);
|
EXPECT_NE(nullptr, blob);
|
||||||
caffe2::TensorCPU Z(blob->Get<caffe2::Tensor<Context>>());
|
const auto& Z = blob->Get<caffe2::Tensor<Context>>();
|
||||||
EXPECT_EQ(Z.size(), M * N);
|
EXPECT_EQ(Z.size(), M * N);
|
||||||
std::vector<bool> result{
|
std::vector<bool> result{
|
||||||
true, false, false, true, false, true, true, false};
|
true, false, false, true, false, true, true, false};
|
||||||
|
|
|
||||||
|
|
@ -296,13 +296,14 @@ bool SumElementsGradientOp<T, Context>::RunOnDevice()
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
auto& X = Input(0);
|
auto& X = Input(0);
|
||||||
TensorCPU sum_grad = TensorCPU(Input(1));
|
const auto& sum_grad = Input(1);
|
||||||
auto* dX = Output(0);
|
auto* dX = Output(0);
|
||||||
dX->ResizeLike(X);
|
dX->ResizeLike(X);
|
||||||
DCHECK_EQ(sum_grad.size(), 1);
|
DCHECK_EQ(sum_grad.size(), 1);
|
||||||
math::Set<T, Context>(
|
math::Set<T, Context>(
|
||||||
dX->size(),
|
dX->size(),
|
||||||
static_cast<T>(sum_grad.data<T>()[0] * (average_ ? 1.0 / X.size() : 1)),
|
static_cast<T>(
|
||||||
|
sum_grad.template data<T>()[0] * (average_ ? 1.0 / X.size() : 1)),
|
||||||
dX->template mutable_data<T>(),
|
dX->template mutable_data<T>(),
|
||||||
&context_);
|
&context_);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
||||||
|
|
@ -385,7 +385,7 @@ lengths_out: [5]
|
||||||
.Output(
|
.Output(
|
||||||
0,
|
0,
|
||||||
"data_out",
|
"data_out",
|
||||||
"*(type: Tensor)* Padded data tensor ($T<N + 2*padding\_width, "
|
"*(type: Tensor)* Padded data tensor ($T<N + 2*padding\\_width, "
|
||||||
"D_1, ..., D_n>$).")
|
"D_1, ..., D_n>$).")
|
||||||
.Output(
|
.Output(
|
||||||
1,
|
1,
|
||||||
|
|
@ -483,7 +483,7 @@ lengths_out_rm: [3]
|
||||||
0,
|
0,
|
||||||
"data_out",
|
"data_out",
|
||||||
"*(type: Tensor)* Padded data tensor "
|
"*(type: Tensor)* Padded data tensor "
|
||||||
"($T<N + 2*padding\_width, D_1, ..., D_n>$).")
|
"($T<N + 2*padding\\_width, D_1, ..., D_n>$).")
|
||||||
.Output(
|
.Output(
|
||||||
1,
|
1,
|
||||||
"lengths_out",
|
"lengths_out",
|
||||||
|
|
|
||||||
|
|
@ -128,6 +128,49 @@ convertToNeuralNetOperator(caffe2::OperatorDef* op) {
|
||||||
nnOp = util::make_unique<repr::BatchNormalization>();
|
nnOp = util::make_unique<repr::BatchNormalization>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (op->type() == "Concat") {
|
||||||
|
nnOp = util::make_unique<repr::Concat>();
|
||||||
|
auto c = dyn_cast<repr::Concat>(nnOp.get());
|
||||||
|
if (argMap.count("axis")) {
|
||||||
|
CAFFE_ENFORCE(argMap["axis"].has_i(), "Invalid axis argument");
|
||||||
|
int axis = static_cast<int>(argMap["axis"].i());
|
||||||
|
c->setAxis(axis);
|
||||||
|
}
|
||||||
|
if (argMap.count("add_axis")) {
|
||||||
|
CAFFE_ENFORCE(argMap["add_axis"].has_i(), "Invalid add_axis argument");
|
||||||
|
int add_axis = static_cast<int>(argMap["add_axis"].i());
|
||||||
|
c->setAddAxis(!!add_axis);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (op->type() == "Flatten") {
|
||||||
|
nnOp = util::make_unique<repr::Flatten>();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (op->type() == "BatchGather") {
|
||||||
|
nnOp = util::make_unique<repr::BatchGather>();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (op->type() == "BatchMatMul") {
|
||||||
|
nnOp = util::make_unique<repr::BatchMatMul>();
|
||||||
|
auto c = dyn_cast<repr::BatchMatMul>(nnOp.get());
|
||||||
|
if (argMap.count("trans_a")) {
|
||||||
|
CAFFE_ENFORCE(argMap["trans_a"].has_i(), "Invalid axis argument");
|
||||||
|
int trans_a = static_cast<int>(argMap["trans_a"].i());
|
||||||
|
c->setTransA(!!trans_a);
|
||||||
|
}
|
||||||
|
if (argMap.count("trans_b")) {
|
||||||
|
CAFFE_ENFORCE(argMap["trans_b"].has_i(), "Invalid add_axis argument");
|
||||||
|
int trans_b = static_cast<int>(argMap["trans_b"].i());
|
||||||
|
c->setTransB(!!trans_b);
|
||||||
|
}
|
||||||
|
if (argMap.count("broadcast")) {
|
||||||
|
CAFFE_ENFORCE(argMap["broadcast"].has_i(), "Invalid add_axis argument");
|
||||||
|
int broadcast = static_cast<int>(argMap["broadcast"].i());
|
||||||
|
c->setBroadcast(!!broadcast);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!nnOp) {
|
if (!nnOp) {
|
||||||
nnOp = util::make_unique<repr::GenericOperator>(op->type());
|
nnOp = util::make_unique<repr::GenericOperator>(op->type());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ class OptimizationPass {
|
||||||
public:
|
public:
|
||||||
OptimizationPass(NNModule* nn) : nn_(nn) {}
|
OptimizationPass(NNModule* nn) : nn_(nn) {}
|
||||||
virtual void run() = 0;
|
virtual void run() = 0;
|
||||||
virtual ~OptimizationPass() = 0;
|
virtual ~OptimizationPass(){}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
NNModule* nn_;
|
NNModule* nn_;
|
||||||
|
|
@ -34,6 +34,7 @@ class OptimizationPass {
|
||||||
class WorkspaceOptimizationPass : public OptimizationPass {
|
class WorkspaceOptimizationPass : public OptimizationPass {
|
||||||
public:
|
public:
|
||||||
WorkspaceOptimizationPass(NNModule* nn, Workspace* ws) : OptimizationPass(nn), ws_(ws) {}
|
WorkspaceOptimizationPass(NNModule* nn, Workspace* ws) : OptimizationPass(nn), ws_(ws) {}
|
||||||
|
virtual ~WorkspaceOptimizationPass(){}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Workspace* ws_;
|
Workspace* ws_;
|
||||||
|
|
@ -42,26 +43,28 @@ class WorkspaceOptimizationPass : public OptimizationPass {
|
||||||
CAFFE_DECLARE_REGISTRY(WorkspaceOptimizationPassRegistry, WorkspaceOptimizationPass, NNModule*, Workspace*);
|
CAFFE_DECLARE_REGISTRY(WorkspaceOptimizationPassRegistry, WorkspaceOptimizationPass, NNModule*, Workspace*);
|
||||||
#define REGISTER_WS_OPT_PASS(clsname) \
|
#define REGISTER_WS_OPT_PASS(clsname) \
|
||||||
CAFFE_REGISTER_CLASS(WorkspaceOptimizationPassRegistry, clsname, clsname)
|
CAFFE_REGISTER_CLASS(WorkspaceOptimizationPassRegistry, clsname, clsname)
|
||||||
#define REGISTER_WS_OPT_PASS_FROM_FUNC(passname, funcname) \
|
#define REGISTER_WS_OPT_PASS_FROM_FUNC(passname, funcname) \
|
||||||
class passname : public WorkspaceOptimizationPass { \
|
class passname : public WorkspaceOptimizationPass { \
|
||||||
public: \
|
public: \
|
||||||
using WorkspaceOptimizationPass::WorkspaceOptimizationPass; \
|
using WorkspaceOptimizationPass::WorkspaceOptimizationPass; \
|
||||||
void run() override { \
|
void run() override { \
|
||||||
funcname(nn_, ws_); \
|
funcname(nn_, ws_); \
|
||||||
} \
|
} \
|
||||||
};
|
}; \
|
||||||
|
REGISTER_WS_OPT_PASS(passname);
|
||||||
|
|
||||||
CAFFE_DECLARE_REGISTRY(OptimizationPassRegistry, OptimizationPass, NNModule*);
|
CAFFE_DECLARE_REGISTRY(OptimizationPassRegistry, OptimizationPass, NNModule*);
|
||||||
#define REGISTER_OPT_PASS(clsname) \
|
#define REGISTER_OPT_PASS(clsname) \
|
||||||
CAFFE_REGISTER_CLASS(OptimizationPassRegistry, clsname, clsname)
|
CAFFE_REGISTER_CLASS(OptimizationPassRegistry, clsname, clsname)
|
||||||
#define REGISTER_OPT_PASS_FROM_FUNC(passname, funcname) \
|
#define REGISTER_OPT_PASS_FROM_FUNC(passname, funcname) \
|
||||||
class passname : public OptimizationPass { \
|
class passname : public OptimizationPass { \
|
||||||
public: \
|
public: \
|
||||||
using OptimizationPass::OptimizationPass; \
|
using OptimizationPass::OptimizationPass; \
|
||||||
void run() override { \
|
void run() override { \
|
||||||
funcname(nn_); \
|
funcname(nn_); \
|
||||||
} \
|
} \
|
||||||
};
|
}; \
|
||||||
|
REGISTER_OPT_PASS(passname);
|
||||||
|
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -91,6 +91,7 @@ class LayerModelHelper(model_helper.ModelHelper):
|
||||||
# additional (hard-coded) diagnose_options to report based on the model
|
# additional (hard-coded) diagnose_options to report based on the model
|
||||||
# TODO(xlwang): it's hack!
|
# TODO(xlwang): it's hack!
|
||||||
self.ad_hoc_diagnose_blobs_and_operations = []
|
self.ad_hoc_diagnose_blobs_and_operations = []
|
||||||
|
self.ad_hoc_plot_blobs = []
|
||||||
|
|
||||||
def clear_output_schema(self):
|
def clear_output_schema(self):
|
||||||
self._output_schema = None
|
self._output_schema = None
|
||||||
|
|
@ -105,6 +106,11 @@ class LayerModelHelper(model_helper.ModelHelper):
|
||||||
(name, value)
|
(name, value)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def add_ad_hoc_plot_blob(self, blob, dtype=None):
|
||||||
|
dtype = dtype or (np.float, (1, ))
|
||||||
|
self.add_metric_field(str(blob), schema.Scalar(dtype, blob))
|
||||||
|
self.ad_hoc_plot_blobs.append(blob)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_global_constant_initializer_op(
|
def _get_global_constant_initializer_op(
|
||||||
blob_name, array=None, dtype=None, initializer=None
|
blob_name, array=None, dtype=None, initializer=None
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,9 @@ class AdaptiveWeight(ModelLayer):
|
||||||
optimizer=None,
|
optimizer=None,
|
||||||
weights=None,
|
weights=None,
|
||||||
enable_diagnose=False,
|
enable_diagnose=False,
|
||||||
estimation_method=None,
|
estimation_method="log_std",
|
||||||
|
pos_optim_method="log_barrier",
|
||||||
|
reg_lambda=0.1,
|
||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
super(AdaptiveWeight, self).__init__(model, name, input_record, **kwargs)
|
super(AdaptiveWeight, self).__init__(model, name, input_record, **kwargs)
|
||||||
|
|
@ -38,20 +40,23 @@ class AdaptiveWeight(ModelLayer):
|
||||||
weights = [1. / self.num for _ in range(self.num)]
|
weights = [1. / self.num for _ in range(self.num)]
|
||||||
assert min(weights) > 0, "initial weights must be positive"
|
assert min(weights) > 0, "initial weights must be positive"
|
||||||
self.weights = np.array(weights).astype(np.float32)
|
self.weights = np.array(weights).astype(np.float32)
|
||||||
self.estimation_method = estimation_method
|
self.estimation_method = str(estimation_method).lower()
|
||||||
if self.estimation_method is not None:
|
# used in positivity-constrained parameterization as when the estimation method
|
||||||
self.estimation_method_type = infer_thrift_union_selection(
|
# is inv_var, with optimization method being either log barrier, or grad proj
|
||||||
estimation_method
|
self.pos_optim_method = str(pos_optim_method).lower()
|
||||||
).lower()
|
self.reg_lambda = float(reg_lambda)
|
||||||
self.estimation_method_value = estimation_method.value
|
|
||||||
else:
|
|
||||||
self.estimation_method_type = "log_std"
|
|
||||||
self.estimation_method_value = None
|
|
||||||
self.enable_diagnose = enable_diagnose
|
self.enable_diagnose = enable_diagnose
|
||||||
self.init_func = getattr(self, self.estimation_method_type + "_init")
|
self.init_func = getattr(self, self.estimation_method + "_init")
|
||||||
self.weight_func = getattr(self, self.estimation_method_type + "_weight")
|
self.weight_func = getattr(self, self.estimation_method + "_weight")
|
||||||
self.reg_func = getattr(self, self.estimation_method_type + "_reg")
|
self.reg_func = getattr(self, self.estimation_method + "_reg")
|
||||||
self.init_func()
|
self.init_func()
|
||||||
|
if self.enable_diagnose:
|
||||||
|
self.weight_i = [
|
||||||
|
self.get_next_blob_reference("adaptive_weight_%d" % i)
|
||||||
|
for i in range(self.num)
|
||||||
|
]
|
||||||
|
for i in range(self.num):
|
||||||
|
self.model.add_ad_hoc_plot_blob(self.weight_i[i])
|
||||||
|
|
||||||
def concat_data(self, net):
|
def concat_data(self, net):
|
||||||
reshaped = [net.NextScopedBlob("reshaped_data_%d" % i) for i in range(self.num)]
|
reshaped = [net.NextScopedBlob("reshaped_data_%d" % i) for i in range(self.num)]
|
||||||
|
|
@ -110,15 +115,15 @@ class AdaptiveWeight(ModelLayer):
|
||||||
"GivenTensorFill",
|
"GivenTensorFill",
|
||||||
{"values": values, "dtype": core.DataType.FLOAT},
|
{"values": values, "dtype": core.DataType.FLOAT},
|
||||||
)
|
)
|
||||||
pos_optim_method = self.estimation_method_value.pos_optim_method.getType()
|
if self.pos_optim_method == "log_barrier":
|
||||||
pos_optim_option = self.estimation_method_value.pos_optim_method.value
|
regularizer = LogBarrier(reg_lambda=self.reg_lambda)
|
||||||
if pos_optim_method == "LOG_BARRIER":
|
elif self.pos_optim_method == "pos_grad_proj":
|
||||||
regularizer = LogBarrier(float(reg_lambda=pos_optim_option.reg_lambda))
|
|
||||||
elif pos_optim_method == "POS_GRAD_PROJ":
|
|
||||||
regularizer = BoundedGradientProjection(lb=0, left_open=True)
|
regularizer = BoundedGradientProjection(lb=0, left_open=True)
|
||||||
else:
|
else:
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"unknown positivity optimization method: {}".format(pos_optim_method)
|
"unknown positivity optimization method: {}".format(
|
||||||
|
self.pos_optim_method
|
||||||
|
)
|
||||||
)
|
)
|
||||||
self.k = self.create_param(
|
self.k = self.create_param(
|
||||||
param_name="k",
|
param_name="k",
|
||||||
|
|
@ -136,7 +141,7 @@ class AdaptiveWeight(ModelLayer):
|
||||||
net.Log(self.k, log_k)
|
net.Log(self.k, log_k)
|
||||||
net.Scale(log_k, reg, scale=-0.5)
|
net.Scale(log_k, reg, scale=-0.5)
|
||||||
|
|
||||||
def add_ops(self, net):
|
def _add_ops_impl(self, net, enable_diagnose):
|
||||||
x = self.concat_data(net)
|
x = self.concat_data(net)
|
||||||
weight = net.NextScopedBlob("weight")
|
weight = net.NextScopedBlob("weight")
|
||||||
reg = net.NextScopedBlob("reg")
|
reg = net.NextScopedBlob("reg")
|
||||||
|
|
@ -147,21 +152,9 @@ class AdaptiveWeight(ModelLayer):
|
||||||
net.Mul([weight, x], weighted_x)
|
net.Mul([weight, x], weighted_x)
|
||||||
net.Add([weighted_x, reg], weighted_x_add_reg)
|
net.Add([weighted_x, reg], weighted_x_add_reg)
|
||||||
net.SumElements(weighted_x_add_reg, self.output_schema())
|
net.SumElements(weighted_x_add_reg, self.output_schema())
|
||||||
if self.enable_diagnose:
|
if enable_diagnose:
|
||||||
for i in range(self.num):
|
for i in range(self.num):
|
||||||
weight_i = net.NextScopedBlob("weight_%d" % i)
|
net.Slice(weight, self.weight_i[i], starts=[i], ends=[i + 1])
|
||||||
net.Slice(weight, weight_i, starts=[i], ends=[i + 1])
|
|
||||||
|
|
||||||
|
def add_ops(self, net):
|
||||||
def infer_thrift_union_selection(ttype_union):
|
self._add_ops_impl(net, self.enable_diagnose)
|
||||||
# TODO(xlwang): this is a hack way to infer the type str of a thrift union
|
|
||||||
# struct
|
|
||||||
assert ttype_union.isUnion(), "type {} is not a thrift union".format(
|
|
||||||
type(ttype_union)
|
|
||||||
)
|
|
||||||
field = ttype_union.field
|
|
||||||
for attr in dir(ttype_union):
|
|
||||||
v = getattr(ttype_union, attr)
|
|
||||||
if isinstance(v, int) and attr != "field" and v == field:
|
|
||||||
return attr
|
|
||||||
raise ValueError("Fail to infer the thrift union type")
|
|
||||||
|
|
|
||||||
|
|
@ -1809,25 +1809,50 @@ class TestLayers(LayersTestCase):
|
||||||
@given(
|
@given(
|
||||||
num=st.integers(min_value=10, max_value=100),
|
num=st.integers(min_value=10, max_value=100),
|
||||||
feed_weight=st.booleans(),
|
feed_weight=st.booleans(),
|
||||||
|
use_inv_var_parameterization=st.booleans(),
|
||||||
|
use_log_barrier=st.booleans(),
|
||||||
|
enable_diagnose=st.booleans(),
|
||||||
**hu.gcs
|
**hu.gcs
|
||||||
)
|
)
|
||||||
def testAdaptiveWeight(self, num, feed_weight, gc, dc):
|
def testAdaptiveWeight(
|
||||||
|
self, num, feed_weight, use_inv_var_parameterization, use_log_barrier,
|
||||||
|
enable_diagnose, gc, dc
|
||||||
|
):
|
||||||
input_record = self.new_record(schema.RawTuple(num))
|
input_record = self.new_record(schema.RawTuple(num))
|
||||||
data = np.random.random(num)
|
data = np.random.random(num)
|
||||||
schema.FeedRecord(
|
schema.FeedRecord(
|
||||||
input_record,
|
input_record, [np.array(x).astype(np.float32) for x in data]
|
||||||
[np.array(x).astype(np.float32) for x in data]
|
|
||||||
)
|
)
|
||||||
weights = np.random.random(num) if feed_weight else None
|
weights = np.random.random(num) if feed_weight else None
|
||||||
result = self.model.AdaptiveWeight(input_record, weights=weights)
|
result = self.model.AdaptiveWeight(
|
||||||
|
input_record,
|
||||||
|
weights=weights,
|
||||||
|
estimation_method=(
|
||||||
|
'inv_var' if use_inv_var_parameterization else 'log_std'
|
||||||
|
),
|
||||||
|
pos_optim_method=(
|
||||||
|
'log_barrier' if use_log_barrier else 'pos_grad_proj'
|
||||||
|
),
|
||||||
|
enable_diagnose=enable_diagnose
|
||||||
|
)
|
||||||
train_init_net, train_net = self.get_training_nets(True)
|
train_init_net, train_net = self.get_training_nets(True)
|
||||||
workspace.RunNetOnce(train_init_net)
|
workspace.RunNetOnce(train_init_net)
|
||||||
workspace.RunNetOnce(train_net)
|
workspace.RunNetOnce(train_net)
|
||||||
result = workspace.FetchBlob(result())
|
result = workspace.FetchBlob(result())
|
||||||
if not feed_weight:
|
if not feed_weight:
|
||||||
weights = 1. / num
|
weights = np.array([1. / num for _ in range(num)])
|
||||||
expected = np.sum(weights * data + 0.5 * np.log(1. / 2. / weights))
|
expected = np.sum(weights * data + 0.5 * np.log(1. / 2. / weights))
|
||||||
npt.assert_allclose(expected, result, atol=1e-4, rtol=1e-4)
|
npt.assert_allclose(expected, result, atol=1e-4, rtol=1e-4)
|
||||||
|
if enable_diagnose:
|
||||||
|
assert len(self.model.ad_hoc_plot_blobs) == num
|
||||||
|
reconst_weights_from_ad_hoc = np.array(
|
||||||
|
[workspace.FetchBlob(b) for b in self.model.ad_hoc_plot_blobs]
|
||||||
|
).flatten()
|
||||||
|
npt.assert_allclose(
|
||||||
|
reconst_weights_from_ad_hoc, weights, atol=1e-4, rtol=1e-4
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
assert len(self.model.ad_hoc_plot_blobs) == 0
|
||||||
|
|
||||||
@given(num=st.integers(min_value=10, max_value=100), **hu.gcs)
|
@given(num=st.integers(min_value=10, max_value=100), **hu.gcs)
|
||||||
def testConstantWeight(self, num, gc, dc):
|
def testConstantWeight(self, num, gc, dc):
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@
|
||||||
#include "caffe2/opt/fusion.h"
|
#include "caffe2/opt/fusion.h"
|
||||||
#include "caffe2/opt/mobile.h"
|
#include "caffe2/opt/mobile.h"
|
||||||
#include "caffe2/opt/optimize_ideep.h"
|
#include "caffe2/opt/optimize_ideep.h"
|
||||||
|
#include "caffe2/opt/passes.h"
|
||||||
#include "caffe2/opt/sink.h"
|
#include "caffe2/opt/sink.h"
|
||||||
#include "caffe2/utils/cpuid.h"
|
#include "caffe2/utils/cpuid.h"
|
||||||
#include "caffe2/utils/string_utils.h"
|
#include "caffe2/utils/string_utils.h"
|
||||||
|
|
@ -1481,6 +1482,45 @@ void addGlobalMethods(py::module& m) {
|
||||||
CAFFE2_CPU_FEATURE_SUPPORT(avx2);
|
CAFFE2_CPU_FEATURE_SUPPORT(avx2);
|
||||||
|
|
||||||
#undef CAFFE2_CPU_FEATURE_SUPPORT
|
#undef CAFFE2_CPU_FEATURE_SUPPORT
|
||||||
|
m.def("transform_exists", [](const std::string& transform_name) {
|
||||||
|
return OptimizationPassRegistry()->Has(transform_name);
|
||||||
|
});
|
||||||
|
m.def("workspace_transform_exists", [](const std::string& transform_name) {
|
||||||
|
return WorkspaceOptimizationPassRegistry()->Has(transform_name);
|
||||||
|
});
|
||||||
|
m.def("run_transform", [](const std::string& transform_name, py::bytes def) {
|
||||||
|
caffe2::NetDef proto;
|
||||||
|
CAFFE_ENFORCE(ParseProtoFromLargeString(def.cast<std::string>(), &proto));
|
||||||
|
auto nn = caffe2::convertToNNModule(proto);
|
||||||
|
auto pass = OptimizationPassRegistry()->Create(transform_name, &nn);
|
||||||
|
|
||||||
|
CAFFE_ENFORCE(pass, "Pass doesn't exist: ", transform_name);
|
||||||
|
pass->run();
|
||||||
|
|
||||||
|
auto new_proto = caffe2::convertToCaffe2Proto(nn, proto);
|
||||||
|
std::string out;
|
||||||
|
new_proto.SerializeToString(&out);
|
||||||
|
return py::bytes(out);
|
||||||
|
});
|
||||||
|
m.def(
|
||||||
|
"run_workspace_transform",
|
||||||
|
[](const std::string& transform_name, py::bytes def) {
|
||||||
|
CAFFE_ENFORCE(gWorkspace);
|
||||||
|
caffe2::NetDef proto;
|
||||||
|
CAFFE_ENFORCE(
|
||||||
|
ParseProtoFromLargeString(def.cast<std::string>(), &proto));
|
||||||
|
auto nn = caffe2::convertToNNModule(proto);
|
||||||
|
auto pass = WorkspaceOptimizationPassRegistry()->Create(
|
||||||
|
transform_name, &nn, gWorkspace);
|
||||||
|
|
||||||
|
CAFFE_ENFORCE(pass, "Pass doesn't exist: ", transform_name);
|
||||||
|
pass->run();
|
||||||
|
|
||||||
|
auto new_proto = caffe2::convertToCaffe2Proto(nn, proto);
|
||||||
|
std::string out;
|
||||||
|
new_proto.SerializeToString(&out);
|
||||||
|
return py::bytes(out);
|
||||||
|
});
|
||||||
|
|
||||||
// Transformations are exposed as functions here and wrapped
|
// Transformations are exposed as functions here and wrapped
|
||||||
// into a python interface in transformations.py
|
// into a python interface in transformations.py
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ namespace python {
|
||||||
class Int8TensorFetcher : public BlobFetcherBase {
|
class Int8TensorFetcher : public BlobFetcherBase {
|
||||||
public:
|
public:
|
||||||
pybind11::object Fetch(const Blob& blob) override {
|
pybind11::object Fetch(const Blob& blob) override {
|
||||||
const caffe2::int8::Int8TensorCPU src =
|
const caffe2::int8::Int8TensorCPU& src =
|
||||||
blob.template Get<caffe2::int8::Int8TensorCPU>();
|
blob.template Get<caffe2::int8::Int8TensorCPU>();
|
||||||
const int numpy_type = CaffeToNumpyType(src.t.meta());
|
const int numpy_type = CaffeToNumpyType(src.t.meta());
|
||||||
CAFFE_ENFORCE(numpy_type != -1, "Int8Tensor contains unknown type data");
|
CAFFE_ENFORCE(numpy_type != -1, "Int8Tensor contains unknown type data");
|
||||||
|
|
|
||||||
|
|
@ -21,10 +21,23 @@ from __future__ import unicode_literals
|
||||||
import caffe2.python._import_c_extension as C
|
import caffe2.python._import_c_extension as C
|
||||||
|
|
||||||
|
|
||||||
def addNNPACK(net):
|
class Transformer(object):
|
||||||
net.Proto().ParseFromString(
|
def __init__(self):
|
||||||
C.transform_addNNPACK(net.Proto().SerializeToString())
|
pass
|
||||||
)
|
|
||||||
|
@classmethod
|
||||||
|
def runTransform(cls, transform_name, net):
|
||||||
|
pb = net.Proto().SerializeToString()
|
||||||
|
if C.transform_exists(transform_name):
|
||||||
|
output = C.run_transform(transform_name, pb)
|
||||||
|
elif C.workspace_transform_exists(transform_name):
|
||||||
|
output = C.run_workspace_transform(transform_name, pb)
|
||||||
|
else:
|
||||||
|
raise AttributeError('Transformation {} not found.'.format(transform_name))
|
||||||
|
net.Proto().ParseFromString(output)
|
||||||
|
|
||||||
|
def __getattr__(self, transform_name):
|
||||||
|
return lambda net : self.runTransform(transform_name, net)
|
||||||
|
|
||||||
|
|
||||||
def fuseNNPACKConvRelu(net):
|
def fuseNNPACKConvRelu(net):
|
||||||
|
|
|
||||||
|
|
@ -22,14 +22,11 @@ from hypothesis import given
|
||||||
import hypothesis.strategies as st
|
import hypothesis.strategies as st
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from caffe2.python.transformations import (
|
from caffe2.python.transformations import Transformer
|
||||||
addNNPACK,
|
|
||||||
fuseNNPACKConvRelu,
|
|
||||||
fuseConvBN,
|
|
||||||
sinkMaxPool,
|
|
||||||
)
|
|
||||||
from caffe2.python import core, workspace, test_util
|
from caffe2.python import core, workspace, test_util
|
||||||
|
|
||||||
|
transformer = Transformer()
|
||||||
|
|
||||||
|
|
||||||
def str_compare(a, b, encoding="utf8"):
|
def str_compare(a, b, encoding="utf8"):
|
||||||
if isinstance(a, bytes):
|
if isinstance(a, bytes):
|
||||||
|
|
@ -40,26 +37,21 @@ def str_compare(a, b, encoding="utf8"):
|
||||||
|
|
||||||
|
|
||||||
class TestTransformations(test_util.TestCase):
|
class TestTransformations(test_util.TestCase):
|
||||||
def test_addNNPACK(self):
|
def test_transformer_AddNNPACK(self):
|
||||||
net = core.Net("net")
|
net = core.Net("net")
|
||||||
net.Conv(
|
net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW")
|
||||||
["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW"
|
|
||||||
)
|
|
||||||
net.Relu(["Y"], ["Y2"])
|
net.Relu(["Y"], ["Y2"])
|
||||||
addNNPACK(net)
|
transformer.AddNNPACK(net)
|
||||||
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
||||||
|
|
||||||
|
def test_transformer_FuseNNPACKConvRelu(self):
|
||||||
def test_fuseNNPACKConvRelu(self):
|
|
||||||
net = core.Net("net")
|
net = core.Net("net")
|
||||||
net.Conv(
|
net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW")
|
||||||
["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW"
|
|
||||||
)
|
|
||||||
net.Relu(["Y"], ["Y2"])
|
net.Relu(["Y"], ["Y2"])
|
||||||
addNNPACK(net) # get the NNPACK engine
|
transformer.AddNNPACK(net) # get the NNPACK engine
|
||||||
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
||||||
fuseNNPACKConvRelu(net)
|
transformer.FuseNNPACKConvRelu(net)
|
||||||
assert (len(net.Proto().op) == 1)
|
assert len(net.Proto().op) == 1
|
||||||
has_activation_arg = False
|
has_activation_arg = False
|
||||||
for arg in net.Proto().op[0].arg:
|
for arg in net.Proto().op[0].arg:
|
||||||
if str_compare(arg.name, "activation"):
|
if str_compare(arg.name, "activation"):
|
||||||
|
|
@ -69,31 +61,27 @@ class TestTransformations(test_util.TestCase):
|
||||||
|
|
||||||
def test_noFuseNNPACKConvRelu(self):
|
def test_noFuseNNPACKConvRelu(self):
|
||||||
net = core.Net("net")
|
net = core.Net("net")
|
||||||
net.Conv(
|
net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW")
|
||||||
["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW"
|
|
||||||
)
|
|
||||||
net.Relu(["Y"], ["Y2"])
|
net.Relu(["Y"], ["Y2"])
|
||||||
net.Relu(["Y"], ["Y3"])
|
net.Relu(["Y"], ["Y3"])
|
||||||
addNNPACK(net) # get the NNPACK engine
|
transformer.AddNNPACK(net) # get the NNPACK engine
|
||||||
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
||||||
fuseNNPACKConvRelu(net)
|
transformer.FuseNNPACKConvRelu(net)
|
||||||
assert (len(net.Proto().op) == 3)
|
assert len(net.Proto().op) == 3
|
||||||
has_activation_arg = False
|
has_activation_arg = False
|
||||||
for arg in net.Proto().op[0].arg:
|
for arg in net.Proto().op[0].arg:
|
||||||
if str_compare(arg.name, "activation") and str_compare(arg.s, "Relu"):
|
if str_compare(arg.name, "activation") and str_compare(arg.s, "Relu"):
|
||||||
has_activation_arg = True
|
has_activation_arg = True
|
||||||
assert not has_activation_arg
|
assert not has_activation_arg
|
||||||
|
|
||||||
def test_fuseNNPACKConvReluNoInplace(self):
|
def test_transformer_FuseNNPACKConvReluNoInplace(self):
|
||||||
net = core.Net("net")
|
net = core.Net("net")
|
||||||
net.Conv(
|
net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW")
|
||||||
["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW"
|
|
||||||
)
|
|
||||||
net.Relu(["Y"], ["X"])
|
net.Relu(["Y"], ["X"])
|
||||||
addNNPACK(net) # get the NNPACK engine
|
transformer.AddNNPACK(net) # get the NNPACK engine
|
||||||
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
||||||
fuseNNPACKConvRelu(net)
|
transformer.FuseNNPACKConvRelu(net)
|
||||||
assert (len(net.Proto().op) == 1)
|
assert len(net.Proto().op) == 1
|
||||||
has_activation_arg = False
|
has_activation_arg = False
|
||||||
for arg in net.Proto().op[0].arg:
|
for arg in net.Proto().op[0].arg:
|
||||||
if str_compare(arg.name, "activation"):
|
if str_compare(arg.name, "activation"):
|
||||||
|
|
@ -102,16 +90,14 @@ class TestTransformations(test_util.TestCase):
|
||||||
assert has_activation_arg
|
assert has_activation_arg
|
||||||
assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0]
|
assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0]
|
||||||
|
|
||||||
def test_fuseNNPACKConvReluInplaceRelu(self):
|
def test_transformer_FuseNNPACKConvReluInplaceRelu(self):
|
||||||
net = core.Net("net")
|
net = core.Net("net")
|
||||||
net.Conv(
|
net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW")
|
||||||
["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW"
|
|
||||||
)
|
|
||||||
net.Relu(["Y"], ["Y"])
|
net.Relu(["Y"], ["Y"])
|
||||||
addNNPACK(net) # get the NNPACK engine
|
transformer.AddNNPACK(net) # get the NNPACK engine
|
||||||
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
||||||
fuseNNPACKConvRelu(net)
|
transformer.FuseNNPACKConvRelu(net)
|
||||||
assert (len(net.Proto().op) == 1)
|
assert len(net.Proto().op) == 1
|
||||||
has_activation_arg = False
|
has_activation_arg = False
|
||||||
for arg in net.Proto().op[0].arg:
|
for arg in net.Proto().op[0].arg:
|
||||||
if str_compare(arg.name, "activation"):
|
if str_compare(arg.name, "activation"):
|
||||||
|
|
@ -120,19 +106,15 @@ class TestTransformations(test_util.TestCase):
|
||||||
assert has_activation_arg
|
assert has_activation_arg
|
||||||
assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0]
|
assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0]
|
||||||
|
|
||||||
def test_fuseNNPACKConvReluPingPongNaming(self):
|
def test_transformer_FuseNNPACKConvReluPingPongNaming(self):
|
||||||
net = core.Net("net")
|
net = core.Net("net")
|
||||||
net.Conv(
|
net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW")
|
||||||
["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW"
|
|
||||||
)
|
|
||||||
net.Relu(["Y"], ["X"])
|
net.Relu(["Y"], ["X"])
|
||||||
net.Conv(
|
net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW")
|
||||||
["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW"
|
transformer.AddNNPACK(net) # get the NNPACK engine
|
||||||
)
|
|
||||||
addNNPACK(net) # get the NNPACK engine
|
|
||||||
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
||||||
fuseNNPACKConvRelu(net)
|
transformer.FuseNNPACKConvRelu(net)
|
||||||
assert (len(net.Proto().op) == 2)
|
assert len(net.Proto().op) == 2
|
||||||
has_activation_arg = False
|
has_activation_arg = False
|
||||||
for arg in net.Proto().op[0].arg:
|
for arg in net.Proto().op[0].arg:
|
||||||
if str_compare(arg.name, "activation"):
|
if str_compare(arg.name, "activation"):
|
||||||
|
|
@ -142,20 +124,16 @@ class TestTransformations(test_util.TestCase):
|
||||||
assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0]
|
assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0]
|
||||||
assert net.Proto().op[1].output[0] != net.Proto().op[1].input[0]
|
assert net.Proto().op[1].output[0] != net.Proto().op[1].input[0]
|
||||||
|
|
||||||
def test_fuseNNPACKConvReluFollowedByMultipleInputOp(self):
|
def test_transformer_FuseNNPACKConvReluFollowedByMultipleInputOp(self):
|
||||||
net = core.Net("net")
|
net = core.Net("net")
|
||||||
net.Conv(
|
net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW")
|
||||||
["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW"
|
|
||||||
)
|
|
||||||
net.Relu(["Y"], ["Y2"])
|
net.Relu(["Y"], ["Y2"])
|
||||||
net.Conv(
|
net.Conv(["Y2", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW")
|
||||||
["Y2", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW"
|
|
||||||
)
|
|
||||||
net.Relu(["Y"], ["Y2"])
|
net.Relu(["Y"], ["Y2"])
|
||||||
addNNPACK(net) # get the NNPACK engine
|
transformer.AddNNPACK(net) # get the NNPACK engine
|
||||||
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
||||||
fuseNNPACKConvRelu(net)
|
transformer.FuseNNPACKConvRelu(net)
|
||||||
assert (len(net.Proto().op) == 2)
|
assert len(net.Proto().op) == 2
|
||||||
has_activation_arg = False
|
has_activation_arg = False
|
||||||
for arg in net.Proto().op[0].arg:
|
for arg in net.Proto().op[0].arg:
|
||||||
if str_compare(arg.name, "activation"):
|
if str_compare(arg.name, "activation"):
|
||||||
|
|
@ -165,20 +143,16 @@ class TestTransformations(test_util.TestCase):
|
||||||
assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0]
|
assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0]
|
||||||
assert net.Proto().op[1].output[0] != net.Proto().op[1].input[0]
|
assert net.Proto().op[1].output[0] != net.Proto().op[1].input[0]
|
||||||
|
|
||||||
def test_fuseNNPACKConvReluInplaceFollowedByMultipleInputOp(self):
|
def test_transformer_FuseNNPACKConvReluInplaceFollowedByMultipleInputOp(self):
|
||||||
net = core.Net("net")
|
net = core.Net("net")
|
||||||
net.Conv(
|
net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW")
|
||||||
["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW"
|
|
||||||
)
|
|
||||||
net.Relu(["Y"], ["Y"])
|
net.Relu(["Y"], ["Y"])
|
||||||
net.Conv(
|
net.Conv(["Y", "w", "b"], ["Y2"], stride=1, pad=0, kernel=3, order="NCHW")
|
||||||
["Y", "w", "b"], ["Y2"], stride=1, pad=0, kernel=3, order="NCHW"
|
|
||||||
)
|
|
||||||
net.Relu(["Y2"], ["Y2"])
|
net.Relu(["Y2"], ["Y2"])
|
||||||
addNNPACK(net) # get the NNPACK engine
|
transformer.AddNNPACK(net) # get the NNPACK engine
|
||||||
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
assert str_compare(net.Proto().op[0].engine, "NNPACK")
|
||||||
fuseNNPACKConvRelu(net)
|
transformer.FuseNNPACKConvRelu(net)
|
||||||
assert (len(net.Proto().op) == 2)
|
assert len(net.Proto().op) == 2
|
||||||
has_activation_arg = False
|
has_activation_arg = False
|
||||||
for arg in net.Proto().op[0].arg:
|
for arg in net.Proto().op[0].arg:
|
||||||
if str_compare(arg.name, "activation"):
|
if str_compare(arg.name, "activation"):
|
||||||
|
|
@ -188,14 +162,12 @@ class TestTransformations(test_util.TestCase):
|
||||||
assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0]
|
assert net.Proto().op[0].output[0] != net.Proto().op[0].input[0]
|
||||||
assert net.Proto().op[1].output[0] != net.Proto().op[1].input[0]
|
assert net.Proto().op[1].output[0] != net.Proto().op[1].input[0]
|
||||||
|
|
||||||
def test_sinkMaxPool(self):
|
def test_transformer_SinkMaxPool(self):
|
||||||
net = core.Net("net")
|
net = core.Net("net")
|
||||||
net.Conv(
|
net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW")
|
||||||
["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=3, order="NCHW"
|
|
||||||
)
|
|
||||||
net.MaxPool(["Y"], ["Y1"], kernel=3)
|
net.MaxPool(["Y"], ["Y1"], kernel=3)
|
||||||
net.Relu(["Y1"], ["Y1"])
|
net.Relu(["Y1"], ["Y1"])
|
||||||
sinkMaxPool(net)
|
transformer.SinkMaxPool(net)
|
||||||
assert str_compare(net.Proto().op[1].type, "Relu")
|
assert str_compare(net.Proto().op[1].type, "Relu")
|
||||||
assert str_compare(net.Proto().op[2].type, "MaxPool")
|
assert str_compare(net.Proto().op[2].type, "MaxPool")
|
||||||
|
|
||||||
|
|
@ -204,9 +176,9 @@ class TestTransformations(test_util.TestCase):
|
||||||
input_channels=st.integers(1, 10),
|
input_channels=st.integers(1, 10),
|
||||||
seed=st.integers(0, 65535),
|
seed=st.integers(0, 65535),
|
||||||
order=st.sampled_from(["NCHW", "NHWC"]),
|
order=st.sampled_from(["NCHW", "NHWC"]),
|
||||||
epsilon=st.floats(min_value=1e-5, max_value=1e-2)
|
epsilon=st.floats(min_value=1e-5, max_value=1e-2),
|
||||||
)
|
)
|
||||||
def test_fuseConvBN(self, size, input_channels, seed, order, epsilon):
|
def test_transformer_FuseConvBN(self, size, input_channels, seed, order, epsilon):
|
||||||
net = core.Net("net")
|
net = core.Net("net")
|
||||||
c = input_channels
|
c = input_channels
|
||||||
h = size
|
h = size
|
||||||
|
|
@ -214,31 +186,20 @@ class TestTransformations(test_util.TestCase):
|
||||||
k = 3
|
k = 3
|
||||||
net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=k, order=order)
|
net.Conv(["X", "w", "b"], ["Y"], stride=1, pad=0, kernel=k, order=order)
|
||||||
net.SpatialBN(
|
net.SpatialBN(
|
||||||
["Y", "scale", "bias", "mean", "var"], ["Y2"],
|
["Y", "scale", "bias", "mean", "var"],
|
||||||
|
["Y2"],
|
||||||
is_test=True,
|
is_test=True,
|
||||||
order=order,
|
order=order,
|
||||||
epsilon=epsilon
|
epsilon=epsilon,
|
||||||
)
|
)
|
||||||
|
|
||||||
np.random.seed(seed)
|
np.random.seed(seed)
|
||||||
if order == "NCHW":
|
if order == "NCHW":
|
||||||
workspace.FeedBlob(
|
workspace.FeedBlob("X", np.random.rand(1, c, h, w).astype(np.float32))
|
||||||
"X",
|
workspace.FeedBlob("w", np.random.rand(c, c, k, k).astype(np.float32))
|
||||||
np.random.rand(1, c, h, w).astype(np.float32)
|
|
||||||
)
|
|
||||||
workspace.FeedBlob(
|
|
||||||
"w",
|
|
||||||
np.random.rand(c, c, k, k).astype(np.float32)
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
workspace.FeedBlob(
|
workspace.FeedBlob("X", np.random.rand(1, h, w, c).astype(np.float32))
|
||||||
"X",
|
workspace.FeedBlob("w", np.random.rand(c, k, k, c).astype(np.float32))
|
||||||
np.random.rand(1, h, w, c).astype(np.float32)
|
|
||||||
)
|
|
||||||
workspace.FeedBlob(
|
|
||||||
"w",
|
|
||||||
np.random.rand(c, k, k, c).astype(np.float32)
|
|
||||||
)
|
|
||||||
workspace.FeedBlob("b", np.random.rand(c).astype(np.float32))
|
workspace.FeedBlob("b", np.random.rand(c).astype(np.float32))
|
||||||
workspace.FeedBlob("scale", np.random.rand(c).astype(np.float32))
|
workspace.FeedBlob("scale", np.random.rand(c).astype(np.float32))
|
||||||
workspace.FeedBlob("bias", np.random.rand(c).astype(np.float32))
|
workspace.FeedBlob("bias", np.random.rand(c).astype(np.float32))
|
||||||
|
|
@ -246,11 +207,13 @@ class TestTransformations(test_util.TestCase):
|
||||||
workspace.FeedBlob("var", np.random.rand(c).astype(np.float32))
|
workspace.FeedBlob("var", np.random.rand(c).astype(np.float32))
|
||||||
workspace.RunNetOnce(net)
|
workspace.RunNetOnce(net)
|
||||||
preTransformOutput = workspace.FetchBlob("Y2")
|
preTransformOutput = workspace.FetchBlob("Y2")
|
||||||
fuseConvBN(net)
|
transformer.FuseConvBN(net)
|
||||||
|
|
||||||
# Ensure fusion
|
# Ensure fusion
|
||||||
assert (len(net.Proto().op) == 1)
|
assert len(net.Proto().op) == 1
|
||||||
workspace.RunNetOnce(net)
|
workspace.RunNetOnce(net)
|
||||||
postTransformOutput = workspace.FetchBlob("Y2")
|
postTransformOutput = workspace.FetchBlob("Y2")
|
||||||
# Check that there is no numerical difference
|
# Check that there is no numerical difference
|
||||||
assert (np.allclose(preTransformOutput, postTransformOutput, rtol=1e-05, atol=1e-08))
|
assert np.allclose(
|
||||||
|
preTransformOutput, postTransformOutput, rtol=1e-05, atol=1e-08
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -163,7 +163,7 @@ bool RebatchingQueue::enqueueOne(
|
||||||
auto& tensorVector = splittedInputs.back();
|
auto& tensorVector = splittedInputs.back();
|
||||||
tensorVector.reserve(inputs.size());
|
tensorVector.reserve(inputs.size());
|
||||||
for (const auto* tensorPtr : inputs) {
|
for (const auto* tensorPtr : inputs) {
|
||||||
tensorVector.push_back(*tensorPtr);
|
tensorVector.push_back(tensorPtr->Clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
return enqueue(std::move(splittedInputs));
|
return enqueue(std::move(splittedInputs));
|
||||||
|
|
|
||||||
|
|
@ -111,19 +111,19 @@ class YellowFinOp final : public Operator<Context> {
|
||||||
bool RunOnDevice() override {
|
bool RunOnDevice() override {
|
||||||
// Iter live on the CPU
|
// Iter live on the CPU
|
||||||
|
|
||||||
#define CAFFE2_YF_READ_INPUT(INPUT_NAME, VAR_NAME) \
|
#define CAFFE2_YF_READ_INPUT(INPUT_NAME, VAR_NAME) \
|
||||||
const auto VAR_NAME##_tensor = Input(INPUT_NAME); \
|
const auto& VAR_NAME##_tensor = Input(INPUT_NAME); \
|
||||||
VAR_NAME##_ = VAR_NAME##_tensor.template data<T>();
|
VAR_NAME##_ = VAR_NAME##_tensor.template data<T>();
|
||||||
|
|
||||||
CAFFE2_YF_READ_INPUT(PARAM, param)
|
CAFFE2_YF_READ_INPUT(PARAM, param)
|
||||||
CAFFE2_YF_READ_INPUT(MOMENT, moment)
|
CAFFE2_YF_READ_INPUT(MOMENT, moment)
|
||||||
CAFFE2_YF_READ_INPUT(LR_AVG, lr_avg)
|
CAFFE2_YF_READ_INPUT(LR_AVG, lr_avg)
|
||||||
CAFFE2_YF_READ_INPUT(MU_AVG, mu_avg)
|
CAFFE2_YF_READ_INPUT(MU_AVG, mu_avg)
|
||||||
CAFFE2_YF_READ_INPUT(CURV_WIN, curv_win)
|
CAFFE2_YF_READ_INPUT(CURV_WIN, curv_win)
|
||||||
CAFFE2_YF_READ_INPUT(G_AVG, g_avg)
|
CAFFE2_YF_READ_INPUT(G_AVG, g_avg)
|
||||||
CAFFE2_YF_READ_INPUT(G2_AVG, g2_avg)
|
CAFFE2_YF_READ_INPUT(G2_AVG, g2_avg)
|
||||||
CAFFE2_YF_READ_INPUT(SCALARS_MEMORY, scalars_memory)
|
CAFFE2_YF_READ_INPUT(SCALARS_MEMORY, scalars_memory)
|
||||||
CAFFE2_YF_READ_INPUT(GRAD, grad)
|
CAFFE2_YF_READ_INPUT(GRAD, grad)
|
||||||
#undef CAFFE2_YF_READ_OUTPUT
|
#undef CAFFE2_YF_READ_OUTPUT
|
||||||
|
|
||||||
CAFFE_ENFORCE(OperatorBase::InputIsType<TensorCPU>(ITER));
|
CAFFE_ENFORCE(OperatorBase::InputIsType<TensorCPU>(ITER));
|
||||||
|
|
|
||||||
29
rsync_exclude.txt
Normal file
29
rsync_exclude.txt
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
# To do syncs, check out caffe2 under ~/local, check out the fbsync branch,
|
||||||
|
# and then execute
|
||||||
|
# rsync -arv --delete --exclude-from=rsync_exclude.txt ./ ~/local/caffe2/
|
||||||
|
# Make sure you do dry run before actually doing anything.
|
||||||
|
|
||||||
|
.git
|
||||||
|
caffe/
|
||||||
|
caffe2/fb/
|
||||||
|
caffe2/experiments/
|
||||||
|
third_party/
|
||||||
|
PLATFORM
|
||||||
|
caffe2/proto/fb_protobuf.sh
|
||||||
|
README.facebook
|
||||||
|
rsync_exclude.txt
|
||||||
|
TARGETS
|
||||||
|
.gitmodules
|
||||||
|
.ipynb_checkpoints
|
||||||
|
*.tmp
|
||||||
|
|
||||||
|
# These two files are created by patch commands and are not needed.
|
||||||
|
*.orig
|
||||||
|
*.rej
|
||||||
|
|
||||||
|
# We have these two files under fbcode for convenience.
|
||||||
|
caffe2/contrib/nervana/nervana_c_api.cu
|
||||||
|
caffe2/contrib/nervana/nervana_c_api.h
|
||||||
|
|
||||||
|
# We have decided to delay open-source the mobile engine of conv transpose.
|
||||||
|
caffe2/operators/conv_transpose_op_mobile*
|
||||||
1
submodules/tbb-rev.txt
Normal file
1
submodules/tbb-rev.txt
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 633b01ad27e012e1dc4e392c3230250d1f4967a4
|
||||||
|
|
@ -340,10 +340,10 @@ TEST_CASE("integration/mnist", "[cuda]") {
|
||||||
auto linear2 = model->add(Linear(50, 10), "linear2");
|
auto linear2 = model->add(Linear(50, 10), "linear2");
|
||||||
|
|
||||||
auto forward = [&](torch::Tensor x) {
|
auto forward = [&](torch::Tensor x) {
|
||||||
x = at::max_pool2d(conv1->forward(x), {2, 2}).relu();
|
x = std::get<0>(at::max_pool2d(conv1->forward(x), {2, 2})).clamp_min(0);
|
||||||
x = conv2->forward(x);
|
x = conv2->forward(x);
|
||||||
x = drop2d->forward(x);
|
x = drop2d->forward(x);
|
||||||
x = at::max_pool2d(x, {2, 2}).relu();
|
x = std::get<0>(at::max_pool2d(x, {2, 2})).clamp_min(0);
|
||||||
|
|
||||||
x = x.view({-1, 320});
|
x = x.view({-1, 320});
|
||||||
x = linear1->forward(x).clamp_min(0);
|
x = linear1->forward(x).clamp_min(0);
|
||||||
|
|
@ -377,10 +377,10 @@ TEST_CASE("integration/mnist/batchnorm", "[cuda]") {
|
||||||
auto linear2 = model->add(Linear(50, 10), "linear2");
|
auto linear2 = model->add(Linear(50, 10), "linear2");
|
||||||
|
|
||||||
auto forward = [&](torch::Tensor x) {
|
auto forward = [&](torch::Tensor x) {
|
||||||
x = at::max_pool2d(conv1->forward(x), {2, 2}).relu();
|
x = std::get<0>(at::max_pool2d(conv1->forward(x), {2, 2})).clamp_min(0);
|
||||||
x = batchnorm2d->forward(x);
|
x = batchnorm2d->forward(x);
|
||||||
x = conv2->forward(x);
|
x = conv2->forward(x);
|
||||||
x = at::max_pool2d(x, {2, 2}).relu();
|
x = std::get<0>(at::max_pool2d(x, {2, 2})).clamp_min(0);
|
||||||
|
|
||||||
x = x.view({-1, 320});
|
x = x.view({-1, 320});
|
||||||
x = linear1->forward(x).clamp_min(0);
|
x = linear1->forward(x).clamp_min(0);
|
||||||
|
|
|
||||||
23
test/test_distributed_trap.py
Normal file
23
test/test_distributed_trap.py
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
import __test_main__
|
||||||
|
|
||||||
|
tmp_dir = tempfile.TemporaryDirectory()
|
||||||
|
os.environ["TEMP_DIR"] = tmp_dir.name
|
||||||
|
os.mkdir(os.path.join(tmp_dir.name, "barrier"))
|
||||||
|
os.mkdir(os.path.join(tmp_dir.name, "test_dir"))
|
||||||
|
init_dir_path = os.path.join(tmp_dir.name, "init_dir")
|
||||||
|
os.mkdir(init_dir_path)
|
||||||
|
init_method = os.environ.get('INIT_METHOD')
|
||||||
|
if init_method is not None and init_method == "zeus":
|
||||||
|
os.environ['INIT_METHOD'] = 'zeus://unittest_' + \
|
||||||
|
str(random.randint(1, 1000000000000))
|
||||||
|
else:
|
||||||
|
os.environ['INIT_METHOD'] = 'file://' + \
|
||||||
|
os.path.join(init_dir_path, 'shared_init_file')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
__test_main__.main(sys.argv)
|
||||||
7
third_party/nccl/CMakeLists.txt
vendored
7
third_party/nccl/CMakeLists.txt
vendored
|
|
@ -7,13 +7,14 @@ ENDIF()
|
||||||
|
|
||||||
include("${CMAKE_UTILS_PATH}")
|
include("${CMAKE_UTILS_PATH}")
|
||||||
torch_cuda_get_nvcc_gencode_flag(NVCC_GENCODE)
|
torch_cuda_get_nvcc_gencode_flag(NVCC_GENCODE)
|
||||||
string(REPLACE "-gencode;" "-gencode=" NVCC_GENCODE "${NVCC_GENCODE}")
|
string (REPLACE ";" " " NVCC_GENCODE "${NVCC_GENCODE}")
|
||||||
message(STATUS "Set NVCC_GENCODE for building NCCL: ${NVCC_GENCODE}")
|
string (REPLACE "-gencode " "-gencode=" NVCC_GENCODE "${NVCC_GENCODE}")
|
||||||
|
message(INFO "Set NVCC_GENCODE for building NCCL: ${NVCC_GENCODE}")
|
||||||
|
|
||||||
ADD_CUSTOM_COMMAND(
|
ADD_CUSTOM_COMMAND(
|
||||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lib/libnccl.so
|
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lib/libnccl.so
|
||||||
COMMAND env CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR} NVCC=${CUDA_NVCC_EXECUTABLE} BUILDDIR=${CMAKE_CURRENT_BINARY_DIR} NVCC_GENCODE="${NVCC_GENCODE}" make -j${NUM_JOBS}
|
COMMAND env CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR} NVCC=${CUDA_NVCC_EXECUTABLE} BUILDDIR=${CMAKE_CURRENT_BINARY_DIR} NVCC_GENCODE="${NVCC_GENCODE}" make -j `getconf _NPROCESSORS_ONLN`
|
||||||
)
|
)
|
||||||
|
|
||||||
ADD_CUSTOM_TARGET(nccl ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/lib/libnccl.so)
|
ADD_CUSTOM_TARGET(nccl ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/lib/libnccl.so)
|
||||||
|
|
|
||||||
|
|
@ -915,11 +915,11 @@
|
||||||
- name: fractional_max_pool2d_forward(Tensor self, IntList kernel_size, IntList output_size, Tensor random_samples)
|
- name: fractional_max_pool2d_forward(Tensor self, IntList kernel_size, IntList output_size, Tensor random_samples)
|
||||||
self: fractional_max_pool2d_backward(grad, self, kernel_size, output_size, indices)
|
self: fractional_max_pool2d_backward(grad, self, kernel_size, output_size, indices)
|
||||||
|
|
||||||
- name: max_pool2d_with_indices_forward(Tensor self, IntList kernel_size, IntList stride, IntList padding, IntList dilation, bool ceil_mode)
|
- name: max_pool2d_forward(Tensor self, IntList kernel_size, IntList stride, IntList padding, IntList dilation, bool ceil_mode)
|
||||||
self: max_pool2d_with_indices_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mode, indices)
|
self: max_pool2d_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mode, indices)
|
||||||
|
|
||||||
- name: max_pool3d_with_indices_forward(Tensor self, IntList kernel_size, IntList stride, IntList padding, IntList dilation, bool ceil_mode)
|
- name: max_pool3d_forward(Tensor self, IntList kernel_size, IntList stride, IntList padding, IntList dilation, bool ceil_mode)
|
||||||
self: max_pool3d_with_indices_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mode, indices)
|
self: max_pool3d_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mode, indices)
|
||||||
|
|
||||||
- name: max_unpool2d_forward(Tensor self, Tensor indices, IntList output_size)
|
- name: max_unpool2d_forward(Tensor self, Tensor indices, IntList output_size)
|
||||||
self: max_unpool2d_backward(grad, self, indices, output_size)
|
self: max_unpool2d_backward(grad, self, indices, output_size)
|
||||||
|
|
@ -1041,11 +1041,11 @@
|
||||||
grad_output: leaky_relu_backward(grad, self, negative_slope)
|
grad_output: leaky_relu_backward(grad, self, negative_slope)
|
||||||
self: zeros_like(grad)
|
self: zeros_like(grad)
|
||||||
|
|
||||||
- name: max_pool2d_with_indices_backward(Tensor grad_output, Tensor self, IntList kernel_size, IntList stride, IntList padding, IntList dilation, bool ceil_mode, Tensor indices)
|
- name: max_pool2d_backward(Tensor grad_output, Tensor self, IntList kernel_size, IntList stride, IntList padding, IntList dilation, bool ceil_mode, Tensor indices)
|
||||||
grad_output: max_pool_double_backward(grad, indices, 2);
|
grad_output: max_pool_double_backward(grad, indices, 2);
|
||||||
self: zeros_like(self)
|
self: zeros_like(self)
|
||||||
|
|
||||||
- name: max_pool3d_with_indices_backward(Tensor grad_output, Tensor self, IntList kernel_size, IntList stride, IntList padding, IntList dilation, bool ceil_mode, Tensor indices)
|
- name: max_pool3d_backward(Tensor grad_output, Tensor self, IntList kernel_size, IntList stride, IntList padding, IntList dilation, bool ceil_mode, Tensor indices)
|
||||||
grad_output: max_pool_double_backward(grad, indices, 3);
|
grad_output: max_pool_double_backward(grad, indices, 3);
|
||||||
self: zeros_like(self)
|
self: zeros_like(self)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ SKIP_PYTHON_BINDINGS = [
|
||||||
'index',
|
'index',
|
||||||
'_indexCopy_', 'max_values', 'min_values', 'argmax', 'argmin',
|
'_indexCopy_', 'max_values', 'min_values', 'argmax', 'argmin',
|
||||||
'_cumsum.*', '_cumprod.*', '_sum.*', '_prod.*', '_th_sum.*', '_th_prod.*',
|
'_cumsum.*', '_cumprod.*', '_sum.*', '_prod.*', '_th_sum.*', '_th_prod.*',
|
||||||
'arange.*', 'range.*', '_gesv.*', 'slice', 'max_pool1d', 'max_pool2d', 'max_pool3d'
|
'arange.*', 'range.*', '_gesv.*', 'slice',
|
||||||
]
|
]
|
||||||
|
|
||||||
PY_VARIABLE_METHOD_VARARGS = CodeTemplate("""\
|
PY_VARIABLE_METHOD_VARARGS = CodeTemplate("""\
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,7 @@ outputs = [
|
||||||
'torch/csrc/autograd/generated/python_nn_functions_dispatch.h',
|
'torch/csrc/autograd/generated/python_nn_functions_dispatch.h',
|
||||||
'torch/csrc/autograd/generated/python_variable_methods.cpp',
|
'torch/csrc/autograd/generated/python_variable_methods.cpp',
|
||||||
'torch/csrc/autograd/generated/python_variable_methods_dispatch.h',
|
'torch/csrc/autograd/generated/python_variable_methods_dispatch.h',
|
||||||
|
'torch/csrc/autograd/generated/variable_factories.h',
|
||||||
'torch/csrc/autograd/generated/VariableType.cpp',
|
'torch/csrc/autograd/generated/VariableType.cpp',
|
||||||
'torch/csrc/autograd/generated/VariableType.h',
|
'torch/csrc/autograd/generated/VariableType.h',
|
||||||
'torch/csrc/jit/generated/aten_dispatch.cpp',
|
'torch/csrc/jit/generated/aten_dispatch.cpp',
|
||||||
|
|
|
||||||
|
|
@ -341,7 +341,7 @@ def max_pool1d(input, kernel_size, stride=None, padding=0, dilation=1,
|
||||||
|
|
||||||
See :class:`~torch.nn.MaxPool1d` for details.
|
See :class:`~torch.nn.MaxPool1d` for details.
|
||||||
"""
|
"""
|
||||||
ret = torch.max_pool1d_with_indices(input, kernel_size, stride, padding, dilation, ceil_mode)
|
ret = torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)
|
||||||
return ret if return_indices else ret[0]
|
return ret if return_indices else ret[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -352,7 +352,7 @@ def max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1,
|
||||||
|
|
||||||
See :class:`~torch.nn.MaxPool2d` for details.
|
See :class:`~torch.nn.MaxPool2d` for details.
|
||||||
"""
|
"""
|
||||||
ret = torch._C._nn.max_pool2d_with_indices(input, kernel_size, stride, padding, dilation, ceil_mode)
|
ret = torch._C._nn.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
|
||||||
return ret if return_indices else ret[0]
|
return ret if return_indices else ret[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -363,7 +363,7 @@ def max_pool3d(input, kernel_size, stride=None, padding=0, dilation=1,
|
||||||
|
|
||||||
See :class:`~torch.nn.MaxPool3d` for details.
|
See :class:`~torch.nn.MaxPool3d` for details.
|
||||||
"""
|
"""
|
||||||
ret = torch._C._nn.max_pool3d_with_indices(input, kernel_size, stride, padding, dilation, ceil_mode)
|
ret = torch._C._nn.max_pool3d(input, kernel_size, stride, padding, dilation, ceil_mode)
|
||||||
return ret if return_indices else ret[0]
|
return ret if return_indices else ret[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -396,11 +396,11 @@ def softplus(g, self, beta, threshold):
|
||||||
return g.op('Softplus', self)
|
return g.op('Softplus', self)
|
||||||
|
|
||||||
|
|
||||||
def max_pool1d_with_indices(g, input, kernel_size, stride, padding, dilation, ceil_mode):
|
def max_pool1d(g, input, kernel_size, stride, padding, dilation, ceil_mode):
|
||||||
if ceil_mode:
|
if ceil_mode:
|
||||||
return _unimplemented("max_pool1d_with_indices", "ceil_mode")
|
return _unimplemented("max_pool1d", "ceil_mode")
|
||||||
if set(_single(dilation)) != {1}:
|
if set(_single(dilation)) != {1}:
|
||||||
return _unimplemented("max_pool1d_with_indices", "dilation")
|
return _unimplemented("max_pool1d", "dilation")
|
||||||
if stride is None:
|
if stride is None:
|
||||||
stride = kernel_size
|
stride = kernel_size
|
||||||
r = g.op("MaxPool", input,
|
r = g.op("MaxPool", input,
|
||||||
|
|
@ -410,11 +410,11 @@ def max_pool1d_with_indices(g, input, kernel_size, stride, padding, dilation, ce
|
||||||
return r, None
|
return r, None
|
||||||
|
|
||||||
|
|
||||||
def max_pool2d_with_indices(g, input, kernel_size, stride, padding, dilation, ceil_mode):
|
def max_pool2d(g, input, kernel_size, stride, padding, dilation, ceil_mode):
|
||||||
if ceil_mode:
|
if ceil_mode:
|
||||||
return _unimplemented("max_pool2d_with_indices", "ceil_mode")
|
return _unimplemented("max_pool2d", "ceil_mode")
|
||||||
if set(_pair(dilation)) != {1}:
|
if set(_pair(dilation)) != {1}:
|
||||||
return _unimplemented("max_pool2d_with_indices", "dilation")
|
return _unimplemented("max_pool2d", "dilation")
|
||||||
if not stride:
|
if not stride:
|
||||||
stride = kernel_size
|
stride = kernel_size
|
||||||
r = g.op("MaxPool", input,
|
r = g.op("MaxPool", input,
|
||||||
|
|
@ -424,11 +424,11 @@ def max_pool2d_with_indices(g, input, kernel_size, stride, padding, dilation, ce
|
||||||
return r, None
|
return r, None
|
||||||
|
|
||||||
|
|
||||||
def max_pool3d_with_indices(g, input, kernel_size, stride, padding, dilation, ceil_mode):
|
def max_pool3d(g, input, kernel_size, stride, padding, dilation, ceil_mode):
|
||||||
if ceil_mode:
|
if ceil_mode:
|
||||||
return _unimplemented("max_pool3d_with_indices", "ceil_mode")
|
return _unimplemented("max_pool3d", "ceil_mode")
|
||||||
if set(_triple(dilation)) != {1}:
|
if set(_triple(dilation)) != {1}:
|
||||||
return _unimplemented("max_pool3d_with_indices", "dilation")
|
return _unimplemented("max_pool3d", "dilation")
|
||||||
if not stride:
|
if not stride:
|
||||||
stride = kernel_size
|
stride = kernel_size
|
||||||
r = g.op("MaxPool", input,
|
r = g.op("MaxPool", input,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user