mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
sync
This commit is contained in:
parent
f01f2063dd
commit
bcea409c82
|
|
@ -11,12 +11,70 @@ CAFFE2_DEFINE_int(
|
||||||
"Chunk size to split tensor data into");
|
"Chunk size to split tensor data into");
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
namespace {
|
||||||
|
/**
|
||||||
|
* @brief StringSerializer is the serializer for String.
|
||||||
|
*
|
||||||
|
* StringSerializer takes in a blob that contains a String, and serializes it
|
||||||
|
* into a BlobProto protocol buffer.
|
||||||
|
*/
|
||||||
|
class StringSerializer : public BlobSerializerBase {
|
||||||
|
public:
|
||||||
|
StringSerializer() {}
|
||||||
|
~StringSerializer() {}
|
||||||
|
/**
|
||||||
|
* Serializes a Blob. Note that this blob has to contain Tensor<Context>,
|
||||||
|
* otherwise this function produces a fatal error.
|
||||||
|
*/
|
||||||
|
void Serialize(
|
||||||
|
const Blob& blob,
|
||||||
|
const string& name,
|
||||||
|
SerializationAcceptor acceptor) override {
|
||||||
|
CHECK(blob.IsType<std::string>());
|
||||||
|
|
||||||
|
BlobProto blob_proto;
|
||||||
|
blob_proto.set_name(name);
|
||||||
|
blob_proto.set_type("std::string");
|
||||||
|
blob_proto.set_content(blob.template Get<std::string>());
|
||||||
|
acceptor(name, blob_proto.SerializeAsString());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief StringDeserializer is the deserializer for Strings.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class StringDeserializer : public BlobDeserializerBase {
|
||||||
|
public:
|
||||||
|
bool Deserialize(const BlobProto& proto, Blob* blob) override {
|
||||||
|
*blob->GetMutable<std::string>() = proto.content();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// We can't use DeviceType_Name because of a protobuf-lite constraint.
|
||||||
|
std::string tensorDeviceTypeName(const DeviceType& d) {
|
||||||
|
switch (d) {
|
||||||
|
case CPU:
|
||||||
|
return "TensorCPU";
|
||||||
|
case CUDA:
|
||||||
|
return "TensorCUDA";
|
||||||
|
default:
|
||||||
|
CAFFE_THROW("Unknown device: ", d);
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// The blob serialization member function implementation.
|
// The blob serialization member function implementation.
|
||||||
void Blob::Serialize(
|
void Blob::Serialize(
|
||||||
const string& name,
|
const string& name,
|
||||||
BlobSerializerBase::SerializationAcceptor acceptor) const {
|
BlobSerializerBase::SerializationAcceptor acceptor) const {
|
||||||
std::unique_ptr<BlobSerializerBase> serializer(CreateSerializer(meta_.id()));
|
std::unique_ptr<BlobSerializerBase> serializer(CreateSerializer(meta_.id()));
|
||||||
|
CAFFE_ENFORCE(serializer, "No known serializer for ", meta_.name());
|
||||||
serializer->Serialize(*this, name, acceptor);
|
serializer->Serialize(*this, name, acceptor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -33,7 +91,6 @@ std::string Blob::Serialize(const string& name) const {
|
||||||
return data.str();
|
return data.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Specialization for StoreDeviceDetail for CPU - nothing needs to be done.
|
// Specialization for StoreDeviceDetail for CPU - nothing needs to be done.
|
||||||
template <>
|
template <>
|
||||||
void TensorSerializer<CPUContext>::StoreDeviceDetail(
|
void TensorSerializer<CPUContext>::StoreDeviceDetail(
|
||||||
|
|
@ -60,9 +117,8 @@ bool Blob::Deserialize(const BlobProto& blob_proto) {
|
||||||
if (blob_proto.has_tensor()) {
|
if (blob_proto.has_tensor()) {
|
||||||
// This is a tensor object. Depending on the device type, we will
|
// This is a tensor object. Depending on the device type, we will
|
||||||
// use the corresponding TensorDeserializer.
|
// use the corresponding TensorDeserializer.
|
||||||
auto deserializer = CreateDeserializer(
|
auto deserializer = CreateDeserializer(tensorDeviceTypeName(
|
||||||
"Tensor" +
|
blob_proto.tensor().device_detail().device_type()));
|
||||||
DeviceType_Name(blob_proto.tensor().device_detail().device_type()));
|
|
||||||
// Tensor's deserializer should always be registered, but we will double
|
// Tensor's deserializer should always be registered, but we will double
|
||||||
// check if it is not null anyway.
|
// check if it is not null anyway.
|
||||||
return CHECK_NOTNULL(deserializer.get())->Deserialize(blob_proto, this);
|
return CHECK_NOTNULL(deserializer.get())->Deserialize(blob_proto, this);
|
||||||
|
|
@ -82,5 +138,8 @@ REGISTER_BLOB_SERIALIZER(
|
||||||
(TypeMeta::Id<TensorCPU>()),
|
(TypeMeta::Id<TensorCPU>()),
|
||||||
TensorSerializer<CPUContext>);
|
TensorSerializer<CPUContext>);
|
||||||
REGISTER_BLOB_DESERIALIZER(TensorCPU, TensorDeserializer<CPUContext>);
|
REGISTER_BLOB_DESERIALIZER(TensorCPU, TensorDeserializer<CPUContext>);
|
||||||
|
// Serialize std::string
|
||||||
|
REGISTER_BLOB_SERIALIZER((TypeMeta::Id<std::string>()), StringSerializer);
|
||||||
|
REGISTER_BLOB_DESERIALIZER(std::string, StringDeserializer);
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
|
|
||||||
|
|
@ -69,6 +69,20 @@ TEST(BlobTest, BlobWrongType) {
|
||||||
ASSERT_THROW(blob.Get<int>(), EnforceNotMet);
|
ASSERT_THROW(blob.Get<int>(), EnforceNotMet);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(BlobTest, StringSerialization) {
|
||||||
|
const std::string kTestString = "Hello world?";
|
||||||
|
Blob blob;
|
||||||
|
*blob.GetMutable<std::string>() = kTestString;
|
||||||
|
|
||||||
|
string serialized = blob.Serialize("test");
|
||||||
|
BlobProto proto;
|
||||||
|
CHECK(proto.ParseFromString(serialized));
|
||||||
|
EXPECT_EQ(proto.name(), "test");
|
||||||
|
EXPECT_EQ(proto.type(), "std::string");
|
||||||
|
EXPECT_FALSE(proto.has_tensor());
|
||||||
|
EXPECT_EQ(proto.content(), kTestString);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(TensorNonTypedTest, TensorChangeType) {
|
TEST(TensorNonTypedTest, TensorChangeType) {
|
||||||
vector<int> dims(3);
|
vector<int> dims(3);
|
||||||
dims[0] = 2;
|
dims[0] = 2;
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,9 @@
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <random>
|
#include <random>
|
||||||
|
|
||||||
#include "caffe2/proto/caffe2.pb.h"
|
|
||||||
#include "caffe2/core/logging.h"
|
#include "caffe2/core/logging.h"
|
||||||
|
#include "caffe2/core/typeid.h"
|
||||||
|
#include "caffe2/proto/caffe2.pb.h"
|
||||||
#include "caffe2/utils/math.h"
|
#include "caffe2/utils/math.h"
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
@ -103,6 +104,7 @@ class CPUContext final {
|
||||||
// Two copy functions that deals with cross-device copies.
|
// Two copy functions that deals with cross-device copies.
|
||||||
template <class SrcContext, class DstContext>
|
template <class SrcContext, class DstContext>
|
||||||
inline void CopyBytes(size_t nbytes, const void* src, void* dst);
|
inline void CopyBytes(size_t nbytes, const void* src, void* dst);
|
||||||
|
|
||||||
template <typename T, class SrcContext, class DstContext>
|
template <typename T, class SrcContext, class DstContext>
|
||||||
inline void Copy(size_t n, const T* src, T* dst) {
|
inline void Copy(size_t n, const T* src, T* dst) {
|
||||||
if (std::is_fundamental<T>::value) {
|
if (std::is_fundamental<T>::value) {
|
||||||
|
|
@ -116,6 +118,16 @@ class CPUContext final {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class SrcContext, class DstContext>
|
||||||
|
inline void
|
||||||
|
CopyItems(const TypeMeta& meta, size_t n, const void* src, void* dst) {
|
||||||
|
if (meta.copy()) {
|
||||||
|
meta.copy()(src, dst, n);
|
||||||
|
} else {
|
||||||
|
CopyBytes<SrcContext, DstContext>(n * meta.itemsize(), src, dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// TODO(jiayq): instead of hard-coding a generator, make it more flexible.
|
// TODO(jiayq): instead of hard-coding a generator, make it more flexible.
|
||||||
int random_seed_{1701};
|
int random_seed_{1701};
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,16 @@ thread_local ThreadLocalCUDAObjects CUDAContext::cuda_objects_;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
bool Caffe2UsePinnedCPUAllocator(int*, char***) {
|
bool Caffe2UsePinnedCPUAllocator(int*, char***) {
|
||||||
|
#ifdef __SANITIZE_ADDRESS__
|
||||||
|
// Note(jiayq): for more details, see
|
||||||
|
// https://github.com/google/sanitizers/issues/629
|
||||||
|
LOG(WARNING) << "There are known issues between address sanitizer and "
|
||||||
|
"cudaMallocHost. As a result, caffe2 will not enable pinned "
|
||||||
|
"memory allocation in asan mode. If you are expecting any "
|
||||||
|
"behavior that depends on asan, be advised that it is not "
|
||||||
|
"turned on.";
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
if (!HasCudaGPU()) {
|
if (!HasCudaGPU()) {
|
||||||
VLOG(1) << "No GPU present. I won't use pinned allocator then.";
|
VLOG(1) << "No GPU present. I won't use pinned allocator then.";
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -14,6 +24,7 @@ bool Caffe2UsePinnedCPUAllocator(int*, char***) {
|
||||||
VLOG(1) << "Caffe2 gpu: setting CPUAllocator to PinnedCPUAllocator.";
|
VLOG(1) << "Caffe2 gpu: setting CPUAllocator to PinnedCPUAllocator.";
|
||||||
SetCPUAllocator(new PinnedCPUAllocator());
|
SetCPUAllocator(new PinnedCPUAllocator());
|
||||||
return true;
|
return true;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
REGISTER_CAFFE2_INIT_FUNCTION(Caffe2UsePinnedCPUAllocator,
|
REGISTER_CAFFE2_INIT_FUNCTION(Caffe2UsePinnedCPUAllocator,
|
||||||
|
|
|
||||||
|
|
@ -116,7 +116,9 @@ CAFFE_DECLARE_REGISTRY(Caffe2DBRegistry, DB, const string&, Mode);
|
||||||
*/
|
*/
|
||||||
inline unique_ptr<DB> CreateDB(
|
inline unique_ptr<DB> CreateDB(
|
||||||
const string& db_type, const string& source, Mode mode) {
|
const string& db_type, const string& source, Mode mode) {
|
||||||
return Caffe2DBRegistry()->Create(db_type, source, mode);
|
auto result = Caffe2DBRegistry()->Create(db_type, source, mode);
|
||||||
|
VLOG(1) << ((!result) ? "not found db " : "found db ") << db_type;
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -68,13 +68,13 @@ class InitRegisterer {
|
||||||
|
|
||||||
#define REGISTER_CAFFE2_INIT_FUNCTION(name, function, description) \
|
#define REGISTER_CAFFE2_INIT_FUNCTION(name, function, description) \
|
||||||
namespace { \
|
namespace { \
|
||||||
::caffe2::InitRegisterer g_caffe2_initregisterer_name( \
|
::caffe2::InitRegisterer g_caffe2_initregisterer_##name( \
|
||||||
function, false, description); \
|
function, false, description); \
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
#define REGISTER_CAFFE2_EARLY_INIT_FUNCTION(name, function, description) \
|
#define REGISTER_CAFFE2_EARLY_INIT_FUNCTION(name, function, description) \
|
||||||
namespace { \
|
namespace { \
|
||||||
::caffe2::InitRegisterer g_caffe2_initregisterer_name( \
|
::caffe2::InitRegisterer g_caffe2_initregisterer_##name( \
|
||||||
function, true, description); \
|
function, true, description); \
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,70 @@
|
||||||
#include "caffe2/core/net.h"
|
#include "caffe2/core/net.h"
|
||||||
|
|
||||||
#include "caffe2/core/context_gpu.h"
|
#include "caffe2/core/context_gpu.h"
|
||||||
|
#include "caffe2/core/flags.h"
|
||||||
|
|
||||||
#include "caffe2/core/operator.h"
|
#include "caffe2/core/operator.h"
|
||||||
#include "caffe2/core/timer.h"
|
#include "caffe2/core/timer.h"
|
||||||
#include "caffe2/proto/caffe2.pb.h"
|
#include "caffe2/proto/caffe2.pb.h"
|
||||||
|
|
||||||
|
#ifdef CAFFE2_USE_NVTX
|
||||||
|
#include <nvToolsExt.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
CAFFE2_DEFINE_bool(caffe2_use_nvtx, false, "Use NVTX ranges for profiling");
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
using Color = int32_t;
|
||||||
|
constexpr Color kRunColor = 0x0000CCFF; // blue
|
||||||
|
constexpr Color kRecordColor = 0x00FF3300; // red
|
||||||
|
constexpr Color kWaitColor = 0x0066FF33; // green
|
||||||
|
|
||||||
|
#ifdef CAFFE2_USE_NVTX
|
||||||
|
|
||||||
|
class ProfiledRange {
|
||||||
|
public:
|
||||||
|
ProfiledRange(const OperatorDef& def, Color color) {
|
||||||
|
if (!FLAGS_caffe2_use_nvtx) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
nvtxEventAttributes_t eventAttrib = {0};
|
||||||
|
eventAttrib.version = NVTX_VERSION;
|
||||||
|
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
|
||||||
|
eventAttrib.colorType = NVTX_COLOR_ARGB;
|
||||||
|
eventAttrib.color = color;
|
||||||
|
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
|
||||||
|
eventAttrib.message.ascii = def.type().c_str();
|
||||||
|
range_ = nvtxRangeStartEx(&eventAttrib);
|
||||||
|
CHECK(range_);
|
||||||
|
}
|
||||||
|
|
||||||
|
~ProfiledRange() {
|
||||||
|
if (!FLAGS_caffe2_use_nvtx) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
nvtxRangeEnd(range_);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
nvtxRangeId_t range_ = 0;
|
||||||
|
DISABLE_COPY_AND_ASSIGN(ProfiledRange);
|
||||||
|
};
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
class ProfiledRange {
|
||||||
|
public:
|
||||||
|
ProfiledRange(const OperatorDef& def, Color color) {}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISABLE_COPY_AND_ASSIGN(ProfiledRange);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // ifdef CAFFE2_USE_NVTX
|
||||||
|
|
||||||
struct Stream;
|
struct Stream;
|
||||||
|
|
||||||
struct Event {
|
struct Event {
|
||||||
|
|
@ -69,6 +125,7 @@ struct Stream {
|
||||||
|
|
||||||
int gpu_id_{-1};
|
int gpu_id_{-1};
|
||||||
cudaStream_t stream_{nullptr};
|
cudaStream_t stream_{nullptr};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISABLE_COPY_AND_ASSIGN(Stream);
|
DISABLE_COPY_AND_ASSIGN(Stream);
|
||||||
};
|
};
|
||||||
|
|
@ -128,18 +185,24 @@ class AsyncDAGNet : public DAGNetBase {
|
||||||
}));
|
}));
|
||||||
|
|
||||||
for (auto source_parent_idx : operator_nodes_[source_idx].parents_) {
|
for (auto source_parent_idx : operator_nodes_[source_idx].parents_) {
|
||||||
|
ProfiledRange r(
|
||||||
|
operator_nodes_[source_parent_idx].operator_->def(), kWaitColor);
|
||||||
stream.wait(events_[source_parent_idx].get());
|
stream.wait(events_[source_parent_idx].get());
|
||||||
}
|
}
|
||||||
|
|
||||||
// We've waited on all our parent indices.
|
// We've waited on all our parent indices.
|
||||||
bool success = true;
|
bool success = true;
|
||||||
for (auto idx : chain) {
|
for (auto idx : chain) {
|
||||||
|
ProfiledRange r(operator_nodes_[idx].operator_->def(), kRunColor);
|
||||||
success &= operator_nodes_[idx].operator_->RunAsync();
|
success &= operator_nodes_[idx].operator_->RunAsync();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Record an event for the sink of the chain.
|
// Record an event for the sink of the chain.
|
||||||
const auto& sink_idx = chain.back();
|
const auto& sink_idx = chain.back();
|
||||||
|
{
|
||||||
|
ProfiledRange r(operator_nodes_[sink_idx].operator_->def(), kRecordColor);
|
||||||
events_[sink_idx]->record(stream);
|
events_[sink_idx]->record(stream);
|
||||||
|
}
|
||||||
CHECK(!eventRecorded_[sink_idx]);
|
CHECK(!eventRecorded_[sink_idx]);
|
||||||
eventRecorded_[sink_idx] = 1;
|
eventRecorded_[sink_idx] = 1;
|
||||||
return success;
|
return success;
|
||||||
|
|
@ -157,9 +220,11 @@ class AsyncDAGNet : public DAGNetBase {
|
||||||
Stream stream{device_option};
|
Stream stream{device_option};
|
||||||
|
|
||||||
// Potential optimization: we can pre-compute outstanding events.
|
// Potential optimization: we can pre-compute outstanding events.
|
||||||
for (auto& event : events_) {
|
for (auto i = 0; i < events_.size(); ++i) {
|
||||||
|
auto& event = events_[i];
|
||||||
if (event->outstanding_) {
|
if (event->outstanding_) {
|
||||||
VLOG(2) << "Synchronizing host on outstanding event";
|
VLOG(2) << "Synchronizing host on outstanding event";
|
||||||
|
ProfiledRange r(operator_nodes_[i].operator_->def(), kWaitColor);
|
||||||
stream.wait(event.get());
|
stream.wait(event.get());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,17 +2,6 @@
|
||||||
#include "caffe2/core/flags.h"
|
#include "caffe2/core/flags.h"
|
||||||
|
|
||||||
CAFFE2_DEFINE_bool(
|
CAFFE2_DEFINE_bool(
|
||||||
caffe2_keep_on_shrink, false,
|
caffe2_keep_on_shrink,
|
||||||
|
true,
|
||||||
"If set, keeps memory when a tensor is shrinking its size.");
|
"If set, keeps memory when a tensor is shrinking its size.");
|
||||||
|
|
||||||
namespace caffe2 {
|
|
||||||
|
|
||||||
namespace detail {
|
|
||||||
|
|
||||||
vector<TIndex>& shape(size_t n) {
|
|
||||||
static thread_local vector<TIndex> r;
|
|
||||||
r.resize(n);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -150,6 +150,40 @@ class Tensor {
|
||||||
|
|
||||||
virtual ~Tensor() {}
|
virtual ~Tensor() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Extends the outer-most dimension of this tensor by num elements,
|
||||||
|
* preserving the existing data.
|
||||||
|
*
|
||||||
|
* The underlying data may be reallocated in order to accommodate the new
|
||||||
|
* elements, in which case this tensors' capacity is grown at a factor of
|
||||||
|
* growthPct. This ensures that Extend runs on an amortized O(1) time
|
||||||
|
* complexity.
|
||||||
|
*/
|
||||||
|
template <class ContextForCopy>
|
||||||
|
void Extend(TIndex num, int growthPct, ContextForCopy* context) {
|
||||||
|
CHECK_GE(dims_.size(), 1);
|
||||||
|
auto oldSize = size_;
|
||||||
|
auto newDims = dims_;
|
||||||
|
newDims[0] += num;
|
||||||
|
if (!data_) {
|
||||||
|
Resize(newDims);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto newSize = std::accumulate(
|
||||||
|
newDims.begin(), newDims.end(), 1, std::multiplies<TIndex>());
|
||||||
|
if (newSize * meta_.itemsize() > capacity_) {
|
||||||
|
auto newCapacity = dims_;
|
||||||
|
newCapacity[0] = std::max(newDims[0], dims_[0] * (growthPct + 100) / 100);
|
||||||
|
auto oldData = std::move(data_);
|
||||||
|
Resize(newCapacity);
|
||||||
|
auto* newData = raw_mutable_data(meta_);
|
||||||
|
context->template CopyItems<ContextForCopy, ContextForCopy>(
|
||||||
|
meta_, oldSize, oldData.get(), newData);
|
||||||
|
}
|
||||||
|
dims_ = newDims;
|
||||||
|
size_ = newSize;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Resizes a tensor.
|
* @brief Resizes a tensor.
|
||||||
*
|
*
|
||||||
|
|
@ -297,9 +331,12 @@ class Tensor {
|
||||||
CHECK(data_.get() || size_ == 0)
|
CHECK(data_.get() || size_ == 0)
|
||||||
<< "The tensor is uninitialized. You probably need to call "
|
<< "The tensor is uninitialized. You probably need to call "
|
||||||
<< "Resize() and mutable_data() first.";
|
<< "Resize() and mutable_data() first.";
|
||||||
CHECK(IsType<T>())
|
CAFFE_ENFORCE(
|
||||||
<< "Tensor type mistmatch, caller expects elements to be "
|
IsType<T>(),
|
||||||
<< TypeMeta::Name<T>() << " while tensor contains " << meta_.name();
|
"Tensor type mistmatch, caller expects elements to be ",
|
||||||
|
TypeMeta::Name<T>(),
|
||||||
|
" while tensor contains ",
|
||||||
|
meta_.name());
|
||||||
return static_cast<T*>(data_.get());
|
return static_cast<T*>(data_.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,34 +12,39 @@
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// Returns a function that returns `true` if we should continue
|
// try to get the should_stop signal, a scalar bool blob value.
|
||||||
// iterating, given the current iteration count.
|
// if the blob doesn't exist or is not initiaized, return false
|
||||||
std::function<bool(int)> getContinuationTest(
|
const bool getShouldStop(const Blob* b) {
|
||||||
Workspace* ws,
|
if (!b || !b->meta().id()) { // not exist or uninitialized
|
||||||
const ExecutionStep& step) {
|
return false;
|
||||||
if (step.has_criteria_network()) {
|
|
||||||
CHECK(!step.has_num_iter())
|
|
||||||
<< "Must not specify num_iter if critera_network is set";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!step.has_criteria_network()) {
|
const auto& t = b->Get<TensorCPU>();
|
||||||
int iterations = step.has_num_iter() ? step.num_iter() : 1;
|
CAFFE_ENFORCE(t.IsType<bool>() && t.size() == 1, "expects a scalar boolean");
|
||||||
VLOG(1) << "Executing step for " << iterations << " iterations.";
|
return *(t.template data<bool>());
|
||||||
return [=](int i) { return i < iterations; };
|
}
|
||||||
|
|
||||||
|
// Returns a function that returns `true` if we should continue
|
||||||
|
// iterating, given the current iteration count.
|
||||||
|
std::function<bool(int64_t)> getContinuationTest(
|
||||||
|
Workspace* ws,
|
||||||
|
const ExecutionStep& step) {
|
||||||
|
if (step.has_should_stop_blob()) {
|
||||||
|
CAFFE_ENFORCE(
|
||||||
|
!step.has_num_iter(),
|
||||||
|
"Must not specify num_iter if should_stop_blob is set");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!step.has_should_stop_blob()) {
|
||||||
|
int64_t iterations = step.has_num_iter() ? step.num_iter() : 1;
|
||||||
|
VLOG(1) << "Will execute step " << step.name() << " for " << iterations
|
||||||
|
<< " iterations.";
|
||||||
|
return [=](int64_t i) { return i < iterations; };
|
||||||
|
} else {
|
||||||
|
VLOG(1) << "Will execute step " << step.name() << " until stopped by blob "
|
||||||
|
<< step.should_stop_blob();
|
||||||
|
return [](int64_t i) { return true; };
|
||||||
}
|
}
|
||||||
auto* criteria_network = ws->GetNet(step.criteria_network());
|
|
||||||
CHECK_NOTNULL(criteria_network);
|
|
||||||
CHECK_EQ(criteria_network->external_output().size(), 1);
|
|
||||||
const auto& criteria_output = criteria_network->external_output().front();
|
|
||||||
VLOG(1) << "Executing step controlled by criteria output: "
|
|
||||||
<< criteria_output;
|
|
||||||
return [=](int) {
|
|
||||||
criteria_network->Run();
|
|
||||||
const auto& blob = ws->GetBlob(criteria_output)->Get<TensorCPU>();
|
|
||||||
CHECK_EQ(blob.size(), 1);
|
|
||||||
CHECK(blob.IsType<bool>());
|
|
||||||
return blob.template data<bool>()[0] > 0;
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
@ -229,10 +234,17 @@ struct Reporter {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define CHECK_SHOULD_STOP(shouldStop) \
|
||||||
|
if (getShouldStop(shouldStop)) { \
|
||||||
|
VLOG(1) << "Execution stopped by should_stop_blob"; \
|
||||||
|
return true; \
|
||||||
|
}
|
||||||
|
|
||||||
bool Workspace::ExecuteStepRecursive(
|
bool Workspace::ExecuteStepRecursive(
|
||||||
const ExecutionStep& step,
|
const ExecutionStep& step,
|
||||||
ShouldContinue externalShouldContinue) {
|
ShouldContinue externalShouldContinue) {
|
||||||
LOG(INFO) << "Running execution step " << step.name();
|
VLOG(1) << "Running execution step " << step.name();
|
||||||
|
|
||||||
if (!(step.substep_size() == 0 || step.network_size() == 0)) {
|
if (!(step.substep_size() == 0 || step.network_size() == 0)) {
|
||||||
LOG(ERROR) << "An ExecutionStep should either have substep or networks "
|
LOG(ERROR) << "An ExecutionStep should either have substep or networks "
|
||||||
<< "but not both.";
|
<< "but not both.";
|
||||||
|
|
@ -247,22 +259,40 @@ bool Workspace::ExecuteStepRecursive(
|
||||||
if (net_map_.count(step.report_net()) == 0) {
|
if (net_map_.count(step.report_net()) == 0) {
|
||||||
LOG(ERROR) << "Report net " << step.report_net() << " not found.";
|
LOG(ERROR) << "Report net " << step.report_net() << " not found.";
|
||||||
}
|
}
|
||||||
|
VLOG(1) << "Starting reporter net";
|
||||||
reporter.start(net_map_[step.report_net()].get(), step.report_interval());
|
reporter.start(net_map_[step.report_net()].get(), step.report_interval());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const Blob* shouldStop = nullptr;
|
||||||
|
if (step.has_should_stop_blob()) {
|
||||||
|
shouldStop = GetBlob(step.should_stop_blob());
|
||||||
|
CAFFE_ENFORCE(
|
||||||
|
shouldStop, "blob ", step.should_stop_blob(), " does not exist");
|
||||||
|
}
|
||||||
|
|
||||||
const auto netShouldContinue = getContinuationTest(this, step);
|
const auto netShouldContinue = getContinuationTest(this, step);
|
||||||
const auto shouldContinue = [&](int iter) {
|
const auto shouldContinue = [&](int64_t iter) {
|
||||||
return externalShouldContinue(iter) && netShouldContinue(iter);
|
return externalShouldContinue(iter) && netShouldContinue(iter);
|
||||||
};
|
};
|
||||||
if (step.substep_size()) {
|
if (step.substep_size()) {
|
||||||
for (int iter = 0; shouldContinue(iter); ++iter) {
|
for (int64_t iter = 0; shouldContinue(iter); ++iter) {
|
||||||
// we assume that, if we have substeps, each substep is going to take a
|
VLOG(1) << "Execution step " << step.name() << ": iteration " << iter;
|
||||||
// reasonable amount of time, so logging here is fine
|
|
||||||
LOG(INFO) << "Execution step " << step.name()
|
if (!step.concurrent_substeps() || step.substep().size() <= 1) {
|
||||||
<< ": Starting iteration " << iter;
|
auto substepShouldContinue = [&, externalShouldContinue](int64_t iter) {
|
||||||
|
return externalShouldContinue(iter);
|
||||||
|
};
|
||||||
|
|
||||||
|
for (auto& ss : step.substep()) {
|
||||||
|
if (!ExecuteStepRecursive(ss, substepShouldContinue)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
CHECK_SHOULD_STOP(shouldStop);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
std::atomic<int> next_substep{0};
|
std::atomic<int> next_substep{0};
|
||||||
std::atomic<bool> got_failure{false};
|
std::atomic<bool> got_failure{false};
|
||||||
auto substepShouldContinue = [&, externalShouldContinue](int iter) {
|
auto substepShouldContinue = [&, externalShouldContinue](int64_t iter) {
|
||||||
return !got_failure && externalShouldContinue(iter);
|
return !got_failure && externalShouldContinue(iter);
|
||||||
};
|
};
|
||||||
auto worker = [&]() {
|
auto worker = [&]() {
|
||||||
|
|
@ -271,26 +301,26 @@ bool Workspace::ExecuteStepRecursive(
|
||||||
if (got_failure || (substep_id >= step.substep().size())) {
|
if (got_failure || (substep_id >= step.substep().size())) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!ExecuteStepRecursive(step.substep().Get(substep_id),
|
if (!ExecuteStepRecursive(
|
||||||
substepShouldContinue)) {
|
step.substep().Get(substep_id), substepShouldContinue)) {
|
||||||
got_failure = true;
|
got_failure = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if (!step.concurrent_substeps() || step.substep().size() <= 1) {
|
|
||||||
worker();
|
|
||||||
} else {
|
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
for (int i = 0; i < step.substep().size(); ++i) {
|
for (int64_t i = 0; i < step.substep().size(); ++i) {
|
||||||
threads.emplace_back(worker);
|
threads.emplace_back(worker);
|
||||||
}
|
}
|
||||||
for (auto& thread: threads) {
|
for (auto& thread: threads) {
|
||||||
thread.join();
|
thread.join();
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (got_failure) {
|
if (got_failure) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// concurrent substeps should be careful about setting should_stop_blob
|
||||||
|
CHECK_SHOULD_STOP(shouldStop);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -305,16 +335,19 @@ bool Workspace::ExecuteStepRecursive(
|
||||||
VLOG(1) << "Going to execute network " << network_name;
|
VLOG(1) << "Going to execute network " << network_name;
|
||||||
networks.push_back(net_map_[network_name].get());
|
networks.push_back(net_map_[network_name].get());
|
||||||
}
|
}
|
||||||
for (int iter = 0; shouldContinue(iter); ++iter) {
|
for (int64_t iter = 0; shouldContinue(iter); ++iter) {
|
||||||
VLOG(1) << "Executing network iteration " << iter;
|
VLOG(1) << "Executing network iteration " << iter;
|
||||||
for (NetBase* network : networks) {
|
for (NetBase* network : networks) {
|
||||||
if (!network->Run()) {
|
if (!network->Run()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
CHECK_SHOULD_STOP(shouldStop);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#undef CHECK_SHOULD_STOP
|
||||||
|
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
|
|
||||||
73
caffe2/operators/atomic_ops.cc
Normal file
73
caffe2/operators/atomic_ops.cc
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
#include <mutex>
|
||||||
|
#include "caffe2/core/context.h"
|
||||||
|
#include "caffe2/core/operator.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
namespace fb {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class CreateMutexOp final : public Operator<CPUContext> {
|
||||||
|
public:
|
||||||
|
CreateMutexOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<CPUContext>(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
*OperatorBase::Output<std::unique_ptr<std::mutex>>(0) =
|
||||||
|
std::unique_ptr<std::mutex>(new std::mutex);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class AtomicFetchAddOp final : public Operator<CPUContext> {
|
||||||
|
public:
|
||||||
|
AtomicFetchAddOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<CPUContext>(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& mutex = OperatorBase::Input<std::unique_ptr<std::mutex>>(0);
|
||||||
|
auto& a = Input(1);
|
||||||
|
auto& b = Input(2);
|
||||||
|
auto* c = Output(0);
|
||||||
|
auto* d = Output(1);
|
||||||
|
c->Resize(std::vector<TIndex>());
|
||||||
|
d->Resize(std::vector<TIndex>());
|
||||||
|
auto* aPtr = a.data<int32_t>();
|
||||||
|
auto* bPtr = b.data<int32_t>();
|
||||||
|
auto* cPtr = c->mutable_data<int32_t>();
|
||||||
|
auto* dPtr = d->mutable_data<int32_t>();
|
||||||
|
std::lock_guard<std::mutex> lg(*mutex);
|
||||||
|
*dPtr = *aPtr;
|
||||||
|
*cPtr = *aPtr + *bPtr;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
REGISTER_CPU_OPERATOR(CreateMutex, CreateMutexOp);
|
||||||
|
REGISTER_CPU_OPERATOR(AtomicFetchAdd, AtomicFetchAddOp);
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(CreateMutex)
|
||||||
|
.NumInputs(0)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc("Creates an unlocked mutex and returns it in a unique_ptr blob.")
|
||||||
|
.Output(0, "mutex_ptr", "Blob containing a std::unique_ptr<mutex>.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(AtomicFetchAdd)
|
||||||
|
.NumInputs(3)
|
||||||
|
.NumOutputs(2)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Given a mutex and two int32 scalar tensors, performs an atomic fetch add
|
||||||
|
by mutating the first argument and adding it to the second input
|
||||||
|
argument. Returns the updated integer and the value prior to the update.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "mutex_ptr", "Blob containing to a unique_ptr<mutex>")
|
||||||
|
.Input(1, "mut_value", "Value to be mutated after the sum.")
|
||||||
|
.Input(2, "increment", "Value to add to the first operand.")
|
||||||
|
.Output(0, "mut_value", "Mutated value after sum. Usually same as input 1.")
|
||||||
|
.Output(1, "fetched_value", "Value of the first operand before sum.")
|
||||||
|
.AllowInplace({{1, 0}});
|
||||||
|
|
||||||
|
SHOULD_NOT_DO_GRADIENT(CreateMutex);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(AtomicFetchAdd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
143
caffe2/operators/boolean_mask_ops.cc
Normal file
143
caffe2/operators/boolean_mask_ops.cc
Normal file
|
|
@ -0,0 +1,143 @@
|
||||||
|
#include "caffe2/core/operator.h"
|
||||||
|
#include "caffe2/core/tensor.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
template <class Context>
|
||||||
|
class BooleanMaskLengthsOp final : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
BooleanMaskLengthsOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<Context>(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
bool DoRunWithType() {
|
||||||
|
auto& lengths = Input(0);
|
||||||
|
auto& mask = Input(1);
|
||||||
|
auto* lengthsOut = Output(0);
|
||||||
|
CAFFE_ENFORCE(lengths.ndim() == 1);
|
||||||
|
CAFFE_ENFORCE(mask.ndim() == 1);
|
||||||
|
const auto* lengthsPtr = lengths.template data<T>();
|
||||||
|
const auto* maskPtr = mask.template data<bool>();
|
||||||
|
auto totalLength =
|
||||||
|
std::accumulate(lengthsPtr, lengthsPtr + lengths.size(), 0);
|
||||||
|
CAFFE_ENFORCE(mask.size() == totalLength);
|
||||||
|
lengthsOut->ResizeLike(lengths);
|
||||||
|
auto* lengthsOutPtr = lengthsOut->template mutable_data<T>();
|
||||||
|
int p = 0;
|
||||||
|
for (int i = 0; i < lengths.size(); ++i) {
|
||||||
|
T lengthOut = 0;
|
||||||
|
for (int j = 0; j < lengthsPtr[i]; ++j) {
|
||||||
|
if (maskPtr[p++]) {
|
||||||
|
++lengthOut;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lengthsOutPtr[i] = lengthOut;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Context>
|
||||||
|
class BooleanMaskOp final : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
BooleanMaskOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<Context>(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& data = Input(0);
|
||||||
|
auto& mask = Input(1);
|
||||||
|
auto* dataOut = Output(0);
|
||||||
|
CAFFE_ENFORCE(data.ndim() >= 1);
|
||||||
|
CAFFE_ENFORCE(mask.ndim(), 1);
|
||||||
|
CAFFE_ENFORCE(data.dims()[0] == mask.dims()[0]);
|
||||||
|
|
||||||
|
const auto* maskPtr = mask.template data<bool>();
|
||||||
|
int numOutputs = 0;
|
||||||
|
int outerSize = mask.size();
|
||||||
|
for (int i = 0; i < outerSize; ++i) {
|
||||||
|
if (maskPtr[i]) {
|
||||||
|
++numOutputs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::vector<TIndex> outShape;
|
||||||
|
outShape.push_back(numOutputs);
|
||||||
|
outShape.insert(outShape.end(), data.dims().begin() + 1, data.dims().end());
|
||||||
|
dataOut->Resize(outShape);
|
||||||
|
if (numOutputs == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto innerSizeBytes = std::accumulate(
|
||||||
|
data.dims().begin() + 1,
|
||||||
|
data.dims().end(),
|
||||||
|
1,
|
||||||
|
std::multiplies<TIndex>()) *
|
||||||
|
data.meta().itemsize();
|
||||||
|
TIndex lastStart = -1;
|
||||||
|
const auto* inPtr = (char*)data.raw_data();
|
||||||
|
auto* outPtr = (char*)dataOut->raw_mutable_data(data.meta());
|
||||||
|
TIndex outStart = 0;
|
||||||
|
for (TIndex i = 0;; ++i) {
|
||||||
|
// mask was true and either a) became false, or b) sequence finished
|
||||||
|
if (lastStart != -1 && ((i >= outerSize) || !maskPtr[i])) {
|
||||||
|
const auto* src = inPtr + lastStart * innerSizeBytes;
|
||||||
|
auto* dst = outPtr + outStart * innerSizeBytes;
|
||||||
|
int numItems = i - lastStart;
|
||||||
|
if (data.meta().copy()) {
|
||||||
|
data.meta().copy()(src, dst, numItems);
|
||||||
|
} else {
|
||||||
|
context_.template CopyBytes<CPUContext, CPUContext>(
|
||||||
|
numItems * data.meta().itemsize(), src, dst);
|
||||||
|
}
|
||||||
|
outStart += numItems;
|
||||||
|
lastStart = -1;
|
||||||
|
}
|
||||||
|
if (i >= outerSize) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// mask was false and became true
|
||||||
|
if (lastStart == -1 && maskPtr[i]) {
|
||||||
|
lastStart = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
REGISTER_CPU_OPERATOR(BooleanMask, BooleanMaskOp<CPUContext>);
|
||||||
|
REGISTER_CPU_OPERATOR(BooleanMaskLengths, BooleanMaskLengthsOp<CPUContext>);
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(BooleanMask)
|
||||||
|
.NumInputs(2)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Given a data 1D tensor and a mask (boolean) tensor of same shape, returns a
|
||||||
|
tensor containing only the elements corresponding to positions where the mask
|
||||||
|
is true.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "data", "The 1D, original data tensor.")
|
||||||
|
.Input(1, "mask", "A tensor of bools of same shape as `data`.")
|
||||||
|
.Output(0, "masked_data", "A tensor of same type as `data`.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(BooleanMaskLengths)
|
||||||
|
.NumInputs(2)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Given a tensor of int32 segment lengths and a mask (boolean) tensor, return
|
||||||
|
the segment lengths of a corresponding segmented tensor after BooleanMask is
|
||||||
|
applied.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "lengths", "A 1D int32 tensor representing segment lengths.")
|
||||||
|
.Input(1, "mask", "A 1D bool tensor of values to keep.")
|
||||||
|
.Output(0, "masked_lengths", "Segment lengths of a masked tensor.");
|
||||||
|
|
||||||
|
NO_GRADIENT(BooleanMask)
|
||||||
|
NO_GRADIENT(BooleanMaskLengths);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -10,8 +10,14 @@ OPERATOR_SCHEMA(Concat).NumInputs(1, INT_MAX).NumOutputs(2);
|
||||||
// Backward compatibility names.
|
// Backward compatibility names.
|
||||||
REGISTER_CPU_OPERATOR(DepthSplit, SplitOp<CPUContext>);
|
REGISTER_CPU_OPERATOR(DepthSplit, SplitOp<CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(DepthConcat, ConcatOp<CPUContext>);
|
REGISTER_CPU_OPERATOR(DepthConcat, ConcatOp<CPUContext>);
|
||||||
OPERATOR_SCHEMA(DepthSplit).NumInputs(1, 2).NumOutputs(1, INT_MAX);
|
OPERATOR_SCHEMA(DepthSplit)
|
||||||
OPERATOR_SCHEMA(DepthConcat).NumInputs(1, INT_MAX).NumOutputs(2);
|
.NumInputs(1, 2)
|
||||||
|
.NumOutputs(1, INT_MAX)
|
||||||
|
.SetDoc("Backward compatible operator name for Split.");
|
||||||
|
OPERATOR_SCHEMA(DepthConcat)
|
||||||
|
.NumInputs(1, INT_MAX)
|
||||||
|
.NumOutputs(2)
|
||||||
|
.SetDoc("Backward compatible operator name for Concat.");
|
||||||
|
|
||||||
class GetSplitGradient : public GradientMakerBase {
|
class GetSplitGradient : public GradientMakerBase {
|
||||||
using GradientMakerBase::GradientMakerBase;
|
using GradientMakerBase::GradientMakerBase;
|
||||||
|
|
|
||||||
|
|
@ -85,7 +85,9 @@ class ConcatOp final : public Operator<Context> {
|
||||||
template <class Context>
|
template <class Context>
|
||||||
bool SplitOp<Context>::RunOnDevice() {
|
bool SplitOp<Context>::RunOnDevice() {
|
||||||
auto& input = Input(0);
|
auto& input = Input(0);
|
||||||
|
const int input_channels = input.dim32(axis_);
|
||||||
const int* axis_data;
|
const int* axis_data;
|
||||||
|
vector<int> equal_split;
|
||||||
if (InputSize() == 2) {
|
if (InputSize() == 2) {
|
||||||
// We obtain split from the input tensor.
|
// We obtain split from the input tensor.
|
||||||
CHECK_EQ(split_.size(), 0)
|
CHECK_EQ(split_.size(), 0)
|
||||||
|
|
@ -94,13 +96,21 @@ bool SplitOp<Context>::RunOnDevice() {
|
||||||
auto& split_tensor = OperatorBase::Input<TensorCPU>(1);
|
auto& split_tensor = OperatorBase::Input<TensorCPU>(1);
|
||||||
CHECK_EQ(split_tensor.size(), OutputSize());
|
CHECK_EQ(split_tensor.size(), OutputSize());
|
||||||
axis_data = split_tensor.template data<int>();
|
axis_data = split_tensor.template data<int>();
|
||||||
|
} else if (split_.size() == 0) {
|
||||||
|
CAFFE_ENFORCE(input_channels % OutputSize() == 0,
|
||||||
|
"If you did not specify split explicitly, the number of "
|
||||||
|
"input channels should be divisible by the output size.");
|
||||||
|
equal_split.resize(OutputSize(), input_channels / OutputSize());
|
||||||
|
axis_data = equal_split.data();
|
||||||
} else {
|
} else {
|
||||||
// We obtain split from the parameters.
|
// We obtain split from the parameters.
|
||||||
CHECK_EQ(split_.size(), OutputSize());
|
CAFFE_ENFORCE(split_.size() == OutputSize(),
|
||||||
|
"The number of splits specified should be equal to the "
|
||||||
|
"number of outputs.");
|
||||||
axis_data = split_.data();
|
axis_data = split_.data();
|
||||||
}
|
}
|
||||||
CHECK_LT(axis_, input.ndim());
|
CHECK_LT(axis_, input.ndim());
|
||||||
const int input_channels = input.dim32(axis_);
|
|
||||||
CHECK_EQ(std::accumulate(axis_data, axis_data + OutputSize(), 0),
|
CHECK_EQ(std::accumulate(axis_data, axis_data + OutputSize(), 0),
|
||||||
input_channels)
|
input_channels)
|
||||||
<< "Sum of split dimensions do not match: should be " << input_channels;
|
<< "Sum of split dimensions do not match: should be " << input_channels;
|
||||||
|
|
|
||||||
|
|
@ -22,11 +22,11 @@ template <typename ArrayOfcudnnConvolutionAlgoPerf_t>
|
||||||
inline void LogCuDNNPerfStats(
|
inline void LogCuDNNPerfStats(
|
||||||
const ArrayOfcudnnConvolutionAlgoPerf_t& perf_stat,
|
const ArrayOfcudnnConvolutionAlgoPerf_t& perf_stat,
|
||||||
int returned_algo_count) {
|
int returned_algo_count) {
|
||||||
LOG(INFO) << "Perf result: (algo: stat, time, memory)";
|
VLOG(1) << "Perf result: (algo: stat, time, memory)";
|
||||||
for (int i = 0; i < returned_algo_count; ++i) {
|
for (int i = 0; i < returned_algo_count; ++i) {
|
||||||
const auto& stat = perf_stat[i];
|
const auto& stat = perf_stat[i];
|
||||||
LOG(INFO) << stat.algo << ": " << stat.status
|
VLOG(1) << stat.algo << ": " << stat.status << " " << stat.time << " "
|
||||||
<< " " << stat.time << " " << stat.memory;
|
<< stat.memory;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
@ -193,7 +193,7 @@ bool CudnnConvOp<T>::RunOnDevice() {
|
||||||
if (deterministic_) {
|
if (deterministic_) {
|
||||||
algo_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
|
algo_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
|
||||||
} else if (exhaustive_search_) {
|
} else if (exhaustive_search_) {
|
||||||
LOG(INFO) << "CUDNN Convolution: doing exhaustive search.";
|
VLOG(1) << "CUDNN Convolution: doing exhaustive search.";
|
||||||
// When we do an exhaustive search, we will ignore the workspace size
|
// When we do an exhaustive search, we will ignore the workspace size
|
||||||
// limit and simply go for the fastest algorithm. If you happen to run
|
// limit and simply go for the fastest algorithm. If you happen to run
|
||||||
// out of memory later, you will be on your own...
|
// out of memory later, you will be on your own...
|
||||||
|
|
@ -229,8 +229,8 @@ bool CudnnConvOp<T>::RunOnDevice() {
|
||||||
cudnn_wrapper_.inline_cudnn_handle(),
|
cudnn_wrapper_.inline_cudnn_handle(),
|
||||||
bottom_desc_, filter_desc_, conv_desc_, top_desc_,
|
bottom_desc_, filter_desc_, conv_desc_, top_desc_,
|
||||||
algo_, &cudnn_ws_nbytes_));
|
algo_, &cudnn_ws_nbytes_));
|
||||||
LOG(INFO) << "CuDNN algorithm: " << algo_;
|
VLOG(1) << "CuDNN algorithm: " << algo_;
|
||||||
LOG(INFO) << "CuDNN workspace size: " << cudnn_ws_nbytes_;
|
VLOG(1) << "CuDNN workspace size: " << cudnn_ws_nbytes_;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now, actually run the computation.
|
// Now, actually run the computation.
|
||||||
|
|
@ -346,7 +346,7 @@ bool CudnnConvGradientOp<T>::RunOnDevice() {
|
||||||
bwd_data_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
|
bwd_data_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
|
||||||
bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
|
bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
|
||||||
} else if (exhaustive_search_) {
|
} else if (exhaustive_search_) {
|
||||||
LOG(INFO) << "CUDNN Convolution bwd: doing exhaustive search.";
|
VLOG(1) << "CUDNN Convolution bwd: doing exhaustive search.";
|
||||||
// When we do an exhaustive search, we will ignore the workspace size
|
// When we do an exhaustive search, we will ignore the workspace size
|
||||||
// limit and simply go for the fastest algorithm. If you happen to run
|
// limit and simply go for the fastest algorithm. If you happen to run
|
||||||
// out of memory later, you will be on your own...
|
// out of memory later, you will be on your own...
|
||||||
|
|
@ -416,9 +416,9 @@ bool CudnnConvGradientOp<T>::RunOnDevice() {
|
||||||
bwd_data_algo_, &bwd_data_ws_size));
|
bwd_data_algo_, &bwd_data_ws_size));
|
||||||
cudnn_ws_nbytes_ = std::max(bwd_filter_ws_size, bwd_data_ws_size);
|
cudnn_ws_nbytes_ = std::max(bwd_filter_ws_size, bwd_data_ws_size);
|
||||||
|
|
||||||
LOG(INFO) << "CuDNN bwd algorithm: " << bwd_filter_algo_ << ", "
|
VLOG(1) << "CuDNN bwd algorithm: " << bwd_filter_algo_ << ", "
|
||||||
<< bwd_data_algo_;
|
<< bwd_data_algo_;
|
||||||
LOG(INFO) << "CuDNN workspace size: " << cudnn_ws_nbytes_;
|
VLOG(1) << "CuDNN workspace size: " << cudnn_ws_nbytes_;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now, actually run the computation.
|
// Now, actually run the computation.
|
||||||
|
|
|
||||||
579
caffe2/operators/conv_transpose_op_cudnn.cc
Normal file
579
caffe2/operators/conv_transpose_op_cudnn.cc
Normal file
|
|
@ -0,0 +1,579 @@
|
||||||
|
#include "caffe2/core/common_cudnn.h"
|
||||||
|
#include "caffe2/core/context_gpu.h"
|
||||||
|
#include "caffe2/operators/conv_transpose_op.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
|
||||||
|
// Earlier in the days Caffe sets the default cudnn workspace to 8MB. We bump
|
||||||
|
// it up to 64MB in Caffe2, as this enables the use of Winograd in many cases,
|
||||||
|
// something very beneficial to more recent CNN models.
|
||||||
|
static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES = 64 * 1024 * 1024;
|
||||||
|
|
||||||
|
// Manually specified number of algorithms implemented in CuDNN.
|
||||||
|
// This does not have any performance implications, as we will always find the
|
||||||
|
// fastest algorithm; setting them to the right number of algorithms will enable
|
||||||
|
// us to best report the statistics when doing an exhaustive search, though.
|
||||||
|
static constexpr size_t kNUM_CUDNN_FWD_ALGS = 7;
|
||||||
|
static constexpr size_t kNUM_CUDNN_BWD_FILTER_ALGS = 4;
|
||||||
|
static constexpr size_t kNUM_CUDNN_BWD_DATA_ALGS = 5;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
template <typename ArrayOfcudnnConvolutionAlgoPerf_t>
|
||||||
|
inline void LogCuDNNPerfStats(
|
||||||
|
const ArrayOfcudnnConvolutionAlgoPerf_t& perf_stat,
|
||||||
|
int returned_algo_count) {
|
||||||
|
LOG(INFO) << "Perf result: (algo: stat, time, memory)";
|
||||||
|
for (int i = 0; i < returned_algo_count; ++i) {
|
||||||
|
const auto& stat = perf_stat[i];
|
||||||
|
LOG(INFO) << stat.algo << ": " << stat.status << " " << stat.time << " "
|
||||||
|
<< stat.memory;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
class CudnnConvTransposeOpBase : public ConvTransposeUnpoolBase<CUDAContext> {
|
||||||
|
public:
|
||||||
|
CudnnConvTransposeOpBase(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: ConvTransposeUnpoolBase<CUDAContext>(operator_def, ws),
|
||||||
|
cudnn_wrapper_(&context_),
|
||||||
|
cudnn_ws_nbytes_limit_(OperatorBase::GetSingleArgument<size_t>(
|
||||||
|
"ws_nbytes_limit",
|
||||||
|
kCONV_CUDNN_WORKSPACE_LIMIT_BYTES)),
|
||||||
|
exhaustive_search_(
|
||||||
|
OperatorBase::GetSingleArgument<int>("exhaustive_search", 0)),
|
||||||
|
deterministic_(
|
||||||
|
OperatorBase::GetSingleArgument<int>("deterministic", 0)),
|
||||||
|
cudnn_state_(OperatorBase::GetSingleArgument<int>("cudnn_state", 0)) {
|
||||||
|
CHECK(!deterministic_ || !exhaustive_search_);
|
||||||
|
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bottom_desc_));
|
||||||
|
CUDNN_CHECK(cudnnCreateFilterDescriptor(&filter_desc_));
|
||||||
|
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc_));
|
||||||
|
CUDNN_CHECK(cudnnCreateTensorDescriptor(&top_desc_));
|
||||||
|
CUDNN_CHECK(cudnnCreateConvolutionDescriptor(&conv_desc_));
|
||||||
|
}
|
||||||
|
|
||||||
|
~CudnnConvTransposeOpBase() {
|
||||||
|
CUDNN_CHECK(cudnnDestroyTensorDescriptor(bottom_desc_));
|
||||||
|
CUDNN_CHECK(cudnnDestroyFilterDescriptor(filter_desc_));
|
||||||
|
CUDNN_CHECK(cudnnDestroyTensorDescriptor(bias_desc_));
|
||||||
|
CUDNN_CHECK(cudnnDestroyTensorDescriptor(top_desc_));
|
||||||
|
CUDNN_CHECK(cudnnDestroyConvolutionDescriptor(conv_desc_));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
vector<TIndex> cudnn_input_dims_;
|
||||||
|
vector<TIndex> cudnn_filter_dims_;
|
||||||
|
|
||||||
|
CuDNNWrapper cudnn_wrapper_;
|
||||||
|
cudnnTensorDescriptor_t bottom_desc_;
|
||||||
|
cudnnFilterDescriptor_t filter_desc_;
|
||||||
|
cudnnTensorDescriptor_t bias_desc_;
|
||||||
|
cudnnTensorDescriptor_t top_desc_;
|
||||||
|
cudnnConvolutionDescriptor_t conv_desc_;
|
||||||
|
const size_t cudnn_ws_nbytes_limit_;
|
||||||
|
size_t cudnn_ws_nbytes_;
|
||||||
|
bool exhaustive_search_;
|
||||||
|
bool deterministic_;
|
||||||
|
size_t cudnn_state_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class CudnnConvTransposeOp final : public CudnnConvTransposeOpBase {
|
||||||
|
public:
|
||||||
|
CudnnConvTransposeOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: CudnnConvTransposeOpBase(operator_def, ws) {}
|
||||||
|
|
||||||
|
~CudnnConvTransposeOp() {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
cudnnConvolutionBwdDataAlgo_t bwd_data_algo_;
|
||||||
|
// Input: X, W, b
|
||||||
|
// Output: Y
|
||||||
|
INPUT_TAGS(INPUT, FILTER, BIAS);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class CudnnConvTransposeGradientOp final : public CudnnConvTransposeOpBase {
|
||||||
|
public:
|
||||||
|
CudnnConvTransposeGradientOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: CudnnConvTransposeOpBase(operator_def, ws) {}
|
||||||
|
|
||||||
|
~CudnnConvTransposeGradientOp() {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
cudnnConvolutionFwdAlgo_t algo_;
|
||||||
|
cudnnConvolutionBwdFilterAlgo_t bwd_filter_algo_;
|
||||||
|
// input: X, W, dY
|
||||||
|
// output: dW, db, and optionally dX
|
||||||
|
INPUT_TAGS(INPUT, FILTER, OUTPUT_GRAD);
|
||||||
|
OUTPUT_TAGS(FILTER_GRAD, BIAS_GRAD, INPUT_GRAD);
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Implementations
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
bool CudnnConvTransposeOp<T>::RunOnDevice() {
|
||||||
|
auto& X = Input(INPUT);
|
||||||
|
auto& filter = Input(FILTER);
|
||||||
|
auto& bias = Input(BIAS);
|
||||||
|
auto* Y = Output(0);
|
||||||
|
int C = 0;
|
||||||
|
switch (order_) {
|
||||||
|
case StorageOrder::NHWC:
|
||||||
|
C = filter.dim32(3);
|
||||||
|
break;
|
||||||
|
case StorageOrder::NCHW:
|
||||||
|
C = filter.dim32(1);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG(FATAL) << "Unknown storage order: " << order_;
|
||||||
|
}
|
||||||
|
ConvTransposeUnpoolBase<CUDAContext>::SetOutputSize(X, Y, C);
|
||||||
|
|
||||||
|
int N = 0, M = 0, H = 0, W = 0, H_out = 0, W_out = 0;
|
||||||
|
switch (order_) {
|
||||||
|
case StorageOrder::NHWC:
|
||||||
|
N = X.dim32(0);
|
||||||
|
H = X.dim32(1);
|
||||||
|
W = X.dim32(2);
|
||||||
|
M = X.dim32(3);
|
||||||
|
H_out = Y->dim32(1);
|
||||||
|
W_out = Y->dim32(2);
|
||||||
|
DCHECK_EQ(filter.dim32(1), kernel_h_);
|
||||||
|
DCHECK_EQ(filter.dim32(1), kernel_h_);
|
||||||
|
DCHECK_EQ(filter.dim32(2), kernel_w_);
|
||||||
|
DCHECK_EQ(filter.dim32(3), C);
|
||||||
|
break;
|
||||||
|
case StorageOrder::NCHW:
|
||||||
|
N = X.dim32(0);
|
||||||
|
M = X.dim32(1);
|
||||||
|
H = X.dim32(2);
|
||||||
|
W = X.dim32(3);
|
||||||
|
H_out = Y->dim32(2);
|
||||||
|
W_out = Y->dim32(3);
|
||||||
|
DCHECK_EQ(filter.dim32(1), C);
|
||||||
|
DCHECK_EQ(filter.dim32(2), kernel_h_);
|
||||||
|
DCHECK_EQ(filter.dim32(3), kernel_w_);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG(FATAL) << "Unknown storage order: " << order_;
|
||||||
|
}
|
||||||
|
|
||||||
|
DCHECK_EQ(bias.ndim(), 1);
|
||||||
|
DCHECK_EQ(bias.dim32(0), C);
|
||||||
|
|
||||||
|
// Set up the cudnn algorithms & workspace if necessary
|
||||||
|
bool input_changed = (X.dims() != cudnn_input_dims_);
|
||||||
|
bool filter_changed = (filter.dims() != cudnn_filter_dims_);
|
||||||
|
|
||||||
|
if (input_changed || filter_changed) {
|
||||||
|
VLOG(1) << "Changing the cudnn descriptor configurations.";
|
||||||
|
if (input_changed) {
|
||||||
|
cudnn_input_dims_ = X.dims();
|
||||||
|
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||||
|
bottom_desc_,
|
||||||
|
GetCudnnTensorFormat(order_),
|
||||||
|
cudnnTypeWrapper<T>::type,
|
||||||
|
N,
|
||||||
|
M,
|
||||||
|
H,
|
||||||
|
W));
|
||||||
|
}
|
||||||
|
if (filter_changed) {
|
||||||
|
cudnn_filter_dims_ = filter.dims();
|
||||||
|
CUDNN_CHECK(cudnnSetFilter4dDescriptor(
|
||||||
|
filter_desc_,
|
||||||
|
cudnnTypeWrapper<T>::type,
|
||||||
|
GetCudnnTensorFormat(order_),
|
||||||
|
M,
|
||||||
|
C,
|
||||||
|
kernel_h_,
|
||||||
|
kernel_w_));
|
||||||
|
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||||
|
bias_desc_,
|
||||||
|
GetCudnnTensorFormat(order_),
|
||||||
|
cudnnTypeWrapper<T>::type,
|
||||||
|
1,
|
||||||
|
C,
|
||||||
|
1,
|
||||||
|
1));
|
||||||
|
}
|
||||||
|
// Set the output
|
||||||
|
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||||
|
top_desc_,
|
||||||
|
GetCudnnTensorFormat(order_),
|
||||||
|
cudnnTypeWrapper<T>::type,
|
||||||
|
N,
|
||||||
|
C,
|
||||||
|
H_out,
|
||||||
|
W_out));
|
||||||
|
// Set the convolution descriptor
|
||||||
|
CHECK_EQ(pad_t_, pad_b_)
|
||||||
|
<< "The current padding scheme leads to unequal padding on the top and "
|
||||||
|
"bottom, which is not supported by cudnn.";
|
||||||
|
CHECK_EQ(pad_l_, pad_r_)
|
||||||
|
<< "The current padding scheme leads to unequal padding on the left "
|
||||||
|
"and right, which is not supported by cudnn.";
|
||||||
|
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
|
||||||
|
conv_desc_,
|
||||||
|
pad_t_,
|
||||||
|
pad_l_,
|
||||||
|
stride_h_,
|
||||||
|
stride_w_,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
CUDNN_CROSS_CORRELATION));
|
||||||
|
if (deterministic_) {
|
||||||
|
bwd_data_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
|
||||||
|
} else if (exhaustive_search_) {
|
||||||
|
int returned_algo_count;
|
||||||
|
std::array<cudnnConvolutionBwdDataAlgoPerf_t, kNUM_CUDNN_BWD_DATA_ALGS>
|
||||||
|
data_perf_stat;
|
||||||
|
cudnn_wrapper_.with_cudnn_state(cudnn_state_, [&](CuDNNState* state) {
|
||||||
|
state->workspace().reset();
|
||||||
|
CUDNN_CHECK(cudnnFindConvolutionBackwardDataAlgorithm(
|
||||||
|
state->cudnn_handle(),
|
||||||
|
filter_desc_,
|
||||||
|
bottom_desc_,
|
||||||
|
conv_desc_,
|
||||||
|
top_desc_,
|
||||||
|
kNUM_CUDNN_BWD_DATA_ALGS,
|
||||||
|
&returned_algo_count,
|
||||||
|
data_perf_stat.data()));
|
||||||
|
});
|
||||||
|
|
||||||
|
LogCuDNNPerfStats(data_perf_stat, returned_algo_count);
|
||||||
|
bwd_data_algo_ = data_perf_stat[0].algo;
|
||||||
|
} else {
|
||||||
|
CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm(
|
||||||
|
cudnn_wrapper_.inline_cudnn_handle(),
|
||||||
|
filter_desc_,
|
||||||
|
bottom_desc_,
|
||||||
|
conv_desc_,
|
||||||
|
top_desc_,
|
||||||
|
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
|
||||||
|
cudnn_ws_nbytes_limit_,
|
||||||
|
&bwd_data_algo_));
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t bwd_data_ws_size;
|
||||||
|
CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize(
|
||||||
|
cudnn_wrapper_.inline_cudnn_handle(),
|
||||||
|
filter_desc_,
|
||||||
|
bottom_desc_,
|
||||||
|
conv_desc_,
|
||||||
|
top_desc_,
|
||||||
|
bwd_data_algo_,
|
||||||
|
&bwd_data_ws_size));
|
||||||
|
cudnn_ws_nbytes_ = bwd_data_ws_size;
|
||||||
|
LOG(INFO) << "CuDNN algorithm: " << bwd_data_algo_;
|
||||||
|
LOG(INFO) << "CuDNN workspace size: " << bwd_data_ws_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now, actually run the computation.
|
||||||
|
// Filter
|
||||||
|
cudnn_wrapper_.with_cudnn_state(cudnn_state_, [&](CuDNNState* state) {
|
||||||
|
CUDNN_CHECK(cudnnConvolutionBackwardData(
|
||||||
|
state->cudnn_handle(),
|
||||||
|
cudnnTypeWrapper<T>::kOne(),
|
||||||
|
filter_desc_,
|
||||||
|
filter.template data<T>(),
|
||||||
|
bottom_desc_,
|
||||||
|
X.template data<T>(),
|
||||||
|
conv_desc_,
|
||||||
|
bwd_data_algo_,
|
||||||
|
state->workspace().get(cudnn_ws_nbytes_),
|
||||||
|
cudnn_ws_nbytes_,
|
||||||
|
cudnnTypeWrapper<T>::kZero(),
|
||||||
|
top_desc_,
|
||||||
|
Y->template mutable_data<T>()));
|
||||||
|
});
|
||||||
|
// Bias
|
||||||
|
CUDNN_CHECK(cudnnAddTensor(
|
||||||
|
cudnn_wrapper_.inline_cudnn_handle(),
|
||||||
|
cudnnTypeWrapper<T>::kOne(),
|
||||||
|
bias_desc_,
|
||||||
|
bias.template data<T>(),
|
||||||
|
cudnnTypeWrapper<T>::kOne(),
|
||||||
|
top_desc_,
|
||||||
|
Y->template mutable_data<T>()));
|
||||||
|
// Done.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(Yangqing): a lot of the function contents are very similar. Consider
|
||||||
|
// consolidating them.
|
||||||
|
template <typename T>
|
||||||
|
bool CudnnConvTransposeGradientOp<T>::RunOnDevice() {
|
||||||
|
auto& X = Input(INPUT);
|
||||||
|
auto& filter = Input(FILTER);
|
||||||
|
auto& dY = Input(OUTPUT_GRAD);
|
||||||
|
auto* dfilter = Output(FILTER_GRAD);
|
||||||
|
auto* dbias = Output(BIAS_GRAD);
|
||||||
|
DCHECK_EQ(X.ndim(), 4);
|
||||||
|
DCHECK_EQ(filter.ndim(), 4);
|
||||||
|
auto* Y = Output(0);
|
||||||
|
int C = 0;
|
||||||
|
switch (order_) {
|
||||||
|
case StorageOrder::NHWC:
|
||||||
|
C = filter.dim32(3);
|
||||||
|
break;
|
||||||
|
case StorageOrder::NCHW:
|
||||||
|
C = filter.dim32(1);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG(FATAL) << "Unknown storage order: " << order_;
|
||||||
|
}
|
||||||
|
ConvTransposeUnpoolBase<CUDAContext>::SetOutputSize(X, Y, C);
|
||||||
|
|
||||||
|
int N = 0, M = 0, H = 0, W = 0, H_out = 0, W_out = 0;
|
||||||
|
switch (order_) {
|
||||||
|
case StorageOrder::NHWC:
|
||||||
|
N = X.dim32(0);
|
||||||
|
H = X.dim32(1);
|
||||||
|
W = X.dim32(2);
|
||||||
|
M = X.dim32(3);
|
||||||
|
H_out = dY.dim32(1);
|
||||||
|
W_out = dY.dim32(2);
|
||||||
|
DCHECK_EQ(filter.dim32(1), kernel_h_);
|
||||||
|
DCHECK_EQ(filter.dim32(1), kernel_h_);
|
||||||
|
DCHECK_EQ(filter.dim32(2), kernel_w_);
|
||||||
|
DCHECK_EQ(filter.dim32(3), C);
|
||||||
|
break;
|
||||||
|
case StorageOrder::NCHW:
|
||||||
|
N = X.dim32(0);
|
||||||
|
M = X.dim32(1);
|
||||||
|
H = X.dim32(2);
|
||||||
|
W = X.dim32(3);
|
||||||
|
H_out = dY.dim32(2);
|
||||||
|
W_out = dY.dim32(3);
|
||||||
|
DCHECK_EQ(filter.dim32(1), C);
|
||||||
|
DCHECK_EQ(filter.dim32(2), kernel_h_);
|
||||||
|
DCHECK_EQ(filter.dim32(3), kernel_w_);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG(FATAL) << "Unknown storage order: " << order_;
|
||||||
|
}
|
||||||
|
// Since we only handle LegacyPadding::NOTSET, we don't need to
|
||||||
|
// compute padding.
|
||||||
|
dfilter->ResizeLike(filter);
|
||||||
|
dbias->Resize(C);
|
||||||
|
|
||||||
|
// Set up the cudnn algorithms & workspace if necessary
|
||||||
|
bool input_changed = (X.dims() != cudnn_input_dims_);
|
||||||
|
bool filter_changed = (filter.dims() != cudnn_filter_dims_);
|
||||||
|
if (input_changed || filter_changed) {
|
||||||
|
VLOG(1) << "Changing the cudnn descriptor configurations.";
|
||||||
|
if (input_changed) {
|
||||||
|
cudnn_input_dims_ = X.dims();
|
||||||
|
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||||
|
bottom_desc_,
|
||||||
|
GetCudnnTensorFormat(order_),
|
||||||
|
cudnnTypeWrapper<T>::type,
|
||||||
|
N,
|
||||||
|
M,
|
||||||
|
H,
|
||||||
|
W));
|
||||||
|
}
|
||||||
|
if (filter_changed) {
|
||||||
|
cudnn_filter_dims_ = filter.dims();
|
||||||
|
CUDNN_CHECK(cudnnSetFilter4dDescriptor(
|
||||||
|
filter_desc_,
|
||||||
|
cudnnTypeWrapper<T>::type,
|
||||||
|
GetCudnnTensorFormat(order_),
|
||||||
|
M,
|
||||||
|
C,
|
||||||
|
kernel_h_,
|
||||||
|
kernel_w_));
|
||||||
|
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||||
|
bias_desc_,
|
||||||
|
GetCudnnTensorFormat(order_),
|
||||||
|
cudnnTypeWrapper<T>::type,
|
||||||
|
1,
|
||||||
|
C,
|
||||||
|
1,
|
||||||
|
1));
|
||||||
|
}
|
||||||
|
// Set the output
|
||||||
|
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||||
|
top_desc_,
|
||||||
|
GetCudnnTensorFormat(order_),
|
||||||
|
cudnnTypeWrapper<T>::type,
|
||||||
|
N,
|
||||||
|
C,
|
||||||
|
H_out,
|
||||||
|
W_out));
|
||||||
|
// Set the convolution descriptor
|
||||||
|
CHECK_EQ(pad_t_, pad_b_)
|
||||||
|
<< "The current padding scheme leads to unequal padding on the top and "
|
||||||
|
"bottom, which is not supported by cudnn.";
|
||||||
|
CHECK_EQ(pad_l_, pad_r_)
|
||||||
|
<< "The current padding scheme leads to unequal padding on the left "
|
||||||
|
"and right, which is not supported by cudnn.";
|
||||||
|
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
|
||||||
|
conv_desc_,
|
||||||
|
pad_t_,
|
||||||
|
pad_l_,
|
||||||
|
stride_h_,
|
||||||
|
stride_w_,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
CUDNN_CROSS_CORRELATION));
|
||||||
|
// Set the workspace
|
||||||
|
|
||||||
|
size_t bwd_filter_ws_size, fwd_ws_size;
|
||||||
|
|
||||||
|
if (deterministic_) {
|
||||||
|
algo_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
|
||||||
|
bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
|
||||||
|
} else if (exhaustive_search_) {
|
||||||
|
LOG(INFO) << "CUDNN Convolution bwd: doing exhaustive search.";
|
||||||
|
// When we do an exhaustive search, we will ignore the workspace size
|
||||||
|
// limit and simply go for the fastest algorithm. If you happen to run
|
||||||
|
// out of memory later, you will be on your own...
|
||||||
|
int returned_algo_count;
|
||||||
|
// We clean up the current workspace memory so that the forward algorithm
|
||||||
|
// is free to allocate memory.
|
||||||
|
// Actually run the search.
|
||||||
|
std::
|
||||||
|
array<cudnnConvolutionBwdFilterAlgoPerf_t, kNUM_CUDNN_BWD_FILTER_ALGS>
|
||||||
|
filter_perf_stat;
|
||||||
|
|
||||||
|
cudnn_wrapper_.with_cudnn_state(cudnn_state_, [&](CuDNNState* state) {
|
||||||
|
state->workspace().reset();
|
||||||
|
CUDNN_CHECK(cudnnFindConvolutionBackwardFilterAlgorithm(
|
||||||
|
state->cudnn_handle(),
|
||||||
|
top_desc_,
|
||||||
|
bottom_desc_,
|
||||||
|
conv_desc_,
|
||||||
|
filter_desc_,
|
||||||
|
kNUM_CUDNN_BWD_FILTER_ALGS,
|
||||||
|
&returned_algo_count,
|
||||||
|
filter_perf_stat.data()));
|
||||||
|
});
|
||||||
|
LogCuDNNPerfStats(filter_perf_stat, returned_algo_count);
|
||||||
|
bwd_filter_algo_ = filter_perf_stat[0].algo;
|
||||||
|
|
||||||
|
std::array<cudnnConvolutionFwdAlgoPerf_t, kNUM_CUDNN_FWD_ALGS>
|
||||||
|
fwd_perf_stat;
|
||||||
|
cudnn_wrapper_.with_cudnn_state(cudnn_state_, [&](CuDNNState* state) {
|
||||||
|
state->workspace().reset();
|
||||||
|
CUDNN_CHECK(cudnnFindConvolutionForwardAlgorithm(
|
||||||
|
state->cudnn_handle(),
|
||||||
|
top_desc_,
|
||||||
|
filter_desc_,
|
||||||
|
conv_desc_,
|
||||||
|
bottom_desc_,
|
||||||
|
kNUM_CUDNN_BWD_DATA_ALGS,
|
||||||
|
&returned_algo_count,
|
||||||
|
fwd_perf_stat.data()));
|
||||||
|
});
|
||||||
|
|
||||||
|
LogCuDNNPerfStats(fwd_perf_stat, returned_algo_count);
|
||||||
|
algo_ = fwd_perf_stat[0].algo;
|
||||||
|
} else {
|
||||||
|
// choose backward algorithm for filter
|
||||||
|
CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm(
|
||||||
|
cudnn_wrapper_.inline_cudnn_handle(),
|
||||||
|
top_desc_,
|
||||||
|
bottom_desc_,
|
||||||
|
conv_desc_,
|
||||||
|
filter_desc_,
|
||||||
|
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
|
||||||
|
cudnn_ws_nbytes_limit_,
|
||||||
|
&bwd_filter_algo_));
|
||||||
|
// choose backward algo for data
|
||||||
|
CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(
|
||||||
|
cudnn_wrapper_.inline_cudnn_handle(),
|
||||||
|
top_desc_,
|
||||||
|
filter_desc_,
|
||||||
|
conv_desc_,
|
||||||
|
bottom_desc_,
|
||||||
|
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
|
||||||
|
cudnn_ws_nbytes_limit_,
|
||||||
|
&algo_));
|
||||||
|
}
|
||||||
|
// get workspace for backwards filter algorithm
|
||||||
|
CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize(
|
||||||
|
cudnn_wrapper_.inline_cudnn_handle(),
|
||||||
|
top_desc_,
|
||||||
|
bottom_desc_,
|
||||||
|
conv_desc_,
|
||||||
|
filter_desc_,
|
||||||
|
bwd_filter_algo_,
|
||||||
|
&bwd_filter_ws_size));
|
||||||
|
// get workspace for backwards data algorithm
|
||||||
|
CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(
|
||||||
|
cudnn_wrapper_.inline_cudnn_handle(),
|
||||||
|
top_desc_,
|
||||||
|
filter_desc_,
|
||||||
|
conv_desc_,
|
||||||
|
bottom_desc_,
|
||||||
|
algo_,
|
||||||
|
&fwd_ws_size));
|
||||||
|
cudnn_ws_nbytes_ = std::max(bwd_filter_ws_size, fwd_ws_size);
|
||||||
|
|
||||||
|
LOG(INFO) << "CuDNN bwd algorithm: " << bwd_filter_algo_ << ", " << algo_;
|
||||||
|
LOG(INFO) << "CuDNN workspace size: " << cudnn_ws_nbytes_;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now, actually run the computation.
|
||||||
|
CUDNN_CHECK(cudnnConvolutionBackwardBias(
|
||||||
|
cudnn_wrapper_.inline_cudnn_handle(),
|
||||||
|
cudnnTypeWrapper<T>::kOne(),
|
||||||
|
top_desc_,
|
||||||
|
dY.template data<T>(),
|
||||||
|
cudnnTypeWrapper<T>::kZero(),
|
||||||
|
bias_desc_,
|
||||||
|
dbias->template mutable_data<T>()));
|
||||||
|
|
||||||
|
cudnn_wrapper_.with_cudnn_state(cudnn_state_, [&](CuDNNState* state) {
|
||||||
|
CUDNN_CHECK(cudnnConvolutionBackwardFilter(
|
||||||
|
state->cudnn_handle(),
|
||||||
|
cudnnTypeWrapper<T>::kOne(),
|
||||||
|
top_desc_,
|
||||||
|
dY.template data<T>(),
|
||||||
|
bottom_desc_,
|
||||||
|
X.template data<T>(),
|
||||||
|
conv_desc_,
|
||||||
|
bwd_filter_algo_,
|
||||||
|
state->workspace().get(cudnn_ws_nbytes_),
|
||||||
|
cudnn_ws_nbytes_,
|
||||||
|
cudnnTypeWrapper<T>::kZero(),
|
||||||
|
filter_desc_,
|
||||||
|
dfilter->template mutable_data<T>()));
|
||||||
|
if (OutputSize() == 3) {
|
||||||
|
// Compute the gradient w.r.t. the input.
|
||||||
|
auto* dX = Output(INPUT_GRAD);
|
||||||
|
dX->ResizeLike(X);
|
||||||
|
CUDNN_CHECK(cudnnConvolutionForward(
|
||||||
|
state->cudnn_handle(),
|
||||||
|
cudnnTypeWrapper<T>::kOne(),
|
||||||
|
top_desc_,
|
||||||
|
dY.template data<T>(),
|
||||||
|
filter_desc_,
|
||||||
|
filter.template data<T>(),
|
||||||
|
conv_desc_,
|
||||||
|
algo_,
|
||||||
|
state->workspace().get(cudnn_ws_nbytes_),
|
||||||
|
cudnn_ws_nbytes_,
|
||||||
|
cudnnTypeWrapper<T>::kZero(),
|
||||||
|
bottom_desc_,
|
||||||
|
dX->template mutable_data<T>()));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_CUDNN_OPERATOR(ConvTranspose, CudnnConvTransposeOp<float>);
|
||||||
|
REGISTER_CUDNN_OPERATOR(
|
||||||
|
ConvTransposeGradient,
|
||||||
|
CudnnConvTransposeGradientOp<float>);
|
||||||
|
|
||||||
|
} // namespace caffe2
|
||||||
|
|
@ -118,9 +118,13 @@ class ConvTransposeUnpoolBase : public Operator<Context> {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool RunOnDeviceWithOrderNCHW() = 0;
|
virtual bool RunOnDeviceWithOrderNCHW() {
|
||||||
|
CAFFE_THROW("Not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
virtual bool RunOnDeviceWithOrderNHWC() = 0;
|
virtual bool RunOnDeviceWithOrderNHWC() {
|
||||||
|
CAFFE_THROW("Not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
virtual ~ConvTransposeUnpoolBase() {}
|
virtual ~ConvTransposeUnpoolBase() {}
|
||||||
|
|
||||||
|
|
|
||||||
46
caffe2/operators/counter_ops.cc
Normal file
46
caffe2/operators/counter_ops.cc
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
#include "counter_ops.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
REGISTER_CPU_OPERATOR(CreateCounter, CreateCounterOp<int32_t>);
|
||||||
|
REGISTER_CPU_OPERATOR(ResetCounter, ResetCounterOp<int32_t>);
|
||||||
|
REGISTER_CPU_OPERATOR(CountDown, CountDownOp<int32_t>);
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(CreateCounter)
|
||||||
|
.NumInputs(0)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Creates a count-down counter with initial value specified by the 'init_count'
|
||||||
|
argument.
|
||||||
|
)DOC")
|
||||||
|
.Output(0, "counter", "A blob pointing to an instance of a new counter.")
|
||||||
|
.Arg("init_count", "Initial count for the counter, must be >= 0.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(ResetCounter)
|
||||||
|
.NumInputs(1)
|
||||||
|
.NumOutputs(0)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Resets a count-down counter with initial value specified by the 'init_count'
|
||||||
|
argument.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "counter", "A blob pointing to an instance of a new counter.")
|
||||||
|
.Arg("init_count", "Resets counter to this value, must be >= 0.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(CountDown)
|
||||||
|
.NumInputs(1)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
If the internal count value > 0, decreases count value by 1 and outputs false,
|
||||||
|
otherwise outputs true.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "counter", "A blob pointing to an instance of a counter.")
|
||||||
|
.Output(0, "should_stop", "false unless the internal count is zero.");
|
||||||
|
|
||||||
|
SHOULD_NOT_DO_GRADIENT(CreateCounter);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(ResetCounter);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(CountDown);
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
} // namespace caffe2
|
||||||
89
caffe2/operators/counter_ops.h
Normal file
89
caffe2/operators/counter_ops.h
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
#ifndef CAFFE2_OPERATORS_COUNTER_OPS_H
|
||||||
|
#define CAFFE2_OPERATORS_COUNTER_OPS_H
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
|
#include "caffe2/core/context.h"
|
||||||
|
#include "caffe2/core/logging.h"
|
||||||
|
#include "caffe2/core/operator.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
namespace {
|
||||||
|
template <typename T>
|
||||||
|
class Counter {
|
||||||
|
public:
|
||||||
|
explicit Counter(T count) : count_(count) {}
|
||||||
|
bool CountDown() {
|
||||||
|
if (count_ > 0) {
|
||||||
|
--count_;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset(T init_count) {
|
||||||
|
count_ = init_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::atomic<T> count_;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class Context = CPUContext>
|
||||||
|
class CreateCounterOp final : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
CreateCounterOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<Context>(operator_def, ws),
|
||||||
|
init_count_(OperatorBase::GetSingleArgument<T>("init_count", 0)) {
|
||||||
|
CHECK_LE(0, init_count_) << "negative init_count is not permitted.";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
*OperatorBase::Output<std::unique_ptr<Counter<T>>>(0) =
|
||||||
|
std::unique_ptr<Counter<T>>(new Counter<T>(init_count_));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
T init_count_ = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, class Context = CPUContext>
|
||||||
|
class ResetCounterOp final : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
ResetCounterOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<Context>(operator_def, ws),
|
||||||
|
init_count_(OperatorBase::GetSingleArgument<T>("init_count", 0)) {
|
||||||
|
CHECK_LE(0, init_count_) << "negative init_count is not permitted.";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& counterPtr = OperatorBase::Input<std::unique_ptr<Counter<T>>>(0);
|
||||||
|
counterPtr->reset(init_count_);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
T init_count_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, class Context = CPUContext>
|
||||||
|
class CountDownOp final : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
CountDownOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<Context>(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& counterPtr = OperatorBase::Input<std::unique_ptr<Counter<T>>>(0);
|
||||||
|
auto* output = Output(0);
|
||||||
|
output->Resize(std::vector<int>{});
|
||||||
|
*output->template mutable_data<bool>() = counterPtr->CountDown();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace caffe2
|
||||||
|
#endif // CAFFE2_OPERATORS_COUNTER_OPS_H_
|
||||||
|
|
@ -2,6 +2,17 @@
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
inline float sigmoid_xent_forward(float lgt, float tgt) {
|
||||||
|
return lgt * (tgt - (lgt >= 0)) - log(1 + exp(lgt - 2 * lgt * (lgt >= 0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float sigmoid_xent_backward(float lgt, float tgt) {
|
||||||
|
return tgt - 1. / (1. + exp(-lgt));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
bool LabelCrossEntropyOp<float, CPUContext>::RunOnDevice() {
|
bool LabelCrossEntropyOp<float, CPUContext>::RunOnDevice() {
|
||||||
auto& X = Input(0);
|
auto& X = Input(0);
|
||||||
|
|
@ -26,6 +37,68 @@ bool LabelCrossEntropyOp<float, CPUContext>::RunOnDevice() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool SigmoidCrossEntropyWithLogitsOp<float, CPUContext>::RunOnDevice() {
|
||||||
|
auto& logits = Input(0);
|
||||||
|
auto& targets = Input(1);
|
||||||
|
CAFFE_ENFORCE(logits.dims() == targets.dims());
|
||||||
|
const auto inner_size = logits.ndim() > 0 ? logits.dims().back() : 1;
|
||||||
|
const auto outer_size = logits.size() / inner_size;
|
||||||
|
|
||||||
|
auto* out = Output(0);
|
||||||
|
if (logits.ndim() == 0) {
|
||||||
|
out->Resize(std::vector<TIndex>{});
|
||||||
|
} else {
|
||||||
|
std::vector<TIndex> dims(logits.dims().begin(), logits.dims().end() - 1);
|
||||||
|
out->Resize(dims);
|
||||||
|
}
|
||||||
|
auto* out_ptr = out->mutable_data<float>();
|
||||||
|
|
||||||
|
auto* logits_ptr = logits.data<float>();
|
||||||
|
auto* targets_ptr = targets.data<float>();
|
||||||
|
|
||||||
|
auto in_idx = 0;
|
||||||
|
for (int i = 0; i < outer_size; ++i) {
|
||||||
|
float value = 0;
|
||||||
|
for (int j = 0; j < inner_size; ++j) {
|
||||||
|
value += sigmoid_xent_forward(logits_ptr[in_idx], targets_ptr[in_idx]);
|
||||||
|
++in_idx;
|
||||||
|
}
|
||||||
|
out_ptr[i] = -value / inner_size;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool SigmoidCrossEntropyWithLogitsGradientOp<float, CPUContext>::RunOnDevice() {
|
||||||
|
auto& g = Input(0);
|
||||||
|
auto& logits = Input(1);
|
||||||
|
auto& targets = Input(2);
|
||||||
|
CAFFE_ENFORCE(logits.dims() == targets.dims());
|
||||||
|
const auto inner_size = logits.ndim() > 0 ? logits.dims().back() : 1;
|
||||||
|
const auto outer_size = logits.size() / inner_size;
|
||||||
|
CAFFE_ENFORCE(g.size() == outer_size);
|
||||||
|
|
||||||
|
auto* out = Output(0);
|
||||||
|
out->ResizeLike(logits);
|
||||||
|
auto* out_ptr = out->mutable_data<float>();
|
||||||
|
|
||||||
|
auto* logits_ptr = logits.data<float>();
|
||||||
|
auto* targets_ptr = targets.data<float>();
|
||||||
|
auto* g_ptr = g.data<float>();
|
||||||
|
|
||||||
|
auto in_idx = 0;
|
||||||
|
for (int i = 0; i < outer_size; ++i) {
|
||||||
|
auto g_factor = -g_ptr[i] / inner_size;
|
||||||
|
for (int i = 0; i < inner_size; ++i) {
|
||||||
|
out_ptr[in_idx] = g_factor *
|
||||||
|
sigmoid_xent_backward(logits_ptr[in_idx], targets_ptr[in_idx]);
|
||||||
|
++in_idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
bool LabelCrossEntropyGradientOp<float, CPUContext>::RunOnDevice() {
|
bool LabelCrossEntropyGradientOp<float, CPUContext>::RunOnDevice() {
|
||||||
auto& X = Input(0);
|
auto& X = Input(0);
|
||||||
|
|
@ -129,6 +202,13 @@ REGISTER_CPU_OPERATOR(MakeTwoClass,
|
||||||
REGISTER_CPU_OPERATOR(MakeTwoClassGradient,
|
REGISTER_CPU_OPERATOR(MakeTwoClassGradient,
|
||||||
MakeTwoClassGradientOp<float, CPUContext>);
|
MakeTwoClassGradientOp<float, CPUContext>);
|
||||||
|
|
||||||
|
REGISTER_CPU_OPERATOR(
|
||||||
|
SigmoidCrossEntropyWithLogits,
|
||||||
|
SigmoidCrossEntropyWithLogitsOp<float, CPUContext>);
|
||||||
|
REGISTER_CPU_OPERATOR(
|
||||||
|
SigmoidCrossEntropyWithLogitsGradient,
|
||||||
|
SigmoidCrossEntropyWithLogitsGradientOp<float, CPUContext>);
|
||||||
|
|
||||||
OPERATOR_SCHEMA(MakeTwoClass)
|
OPERATOR_SCHEMA(MakeTwoClass)
|
||||||
.NumInputs(1)
|
.NumInputs(1)
|
||||||
.NumOutputs(1)
|
.NumOutputs(1)
|
||||||
|
|
@ -145,6 +225,22 @@ OPERATOR_SCHEMA(MakeTwoClassGradient)
|
||||||
.NumInputs(1)
|
.NumInputs(1)
|
||||||
.NumOutputs(1);
|
.NumOutputs(1);
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(SigmoidCrossEntropyWithLogits)
|
||||||
|
.NumInputs(2)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Given two matrices logits and targets, of same shape,
|
||||||
|
(batch_size, num_classes), computes the sigmoid cross entropy between the two.
|
||||||
|
Returns a tensor of shape (batch_size,) of losses for each example.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "logits", "matrix of logits for each example and class.")
|
||||||
|
.Input(1, "targets", "matrix of targets, same shape as logits.")
|
||||||
|
.Output(0, "xentropy", "Vector with the total xentropy for each example.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(SigmoidCrossEntropyWithLogitsGradient)
|
||||||
|
.NumInputs(3)
|
||||||
|
.NumOutputs(1);
|
||||||
|
|
||||||
struct GetMakeTwoClassGradient : public GradientMakerBase {
|
struct GetMakeTwoClassGradient : public GradientMakerBase {
|
||||||
using GradientMakerBase::GradientMakerBase;
|
using GradientMakerBase::GradientMakerBase;
|
||||||
vector<OperatorDef> GetGradientDefs() override {
|
vector<OperatorDef> GetGradientDefs() override {
|
||||||
|
|
@ -156,5 +252,20 @@ struct GetMakeTwoClassGradient : public GradientMakerBase {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
REGISTER_GRADIENT(MakeTwoClass, GetMakeTwoClassGradient);
|
REGISTER_GRADIENT(MakeTwoClass, GetMakeTwoClassGradient);
|
||||||
|
|
||||||
|
struct GetSigmoidCrossEntropyWithLogitsGradient : public GradientMakerBase {
|
||||||
|
using GradientMakerBase::GradientMakerBase;
|
||||||
|
vector<OperatorDef> GetGradientDefs() override {
|
||||||
|
return SingleGradientDef(
|
||||||
|
"SigmoidCrossEntropyWithLogitsGradient",
|
||||||
|
"",
|
||||||
|
vector<string>{GO(0), I(0), I(1)},
|
||||||
|
vector<string>{GI(0)});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
REGISTER_GRADIENT(
|
||||||
|
SigmoidCrossEntropyWithLogits,
|
||||||
|
GetSigmoidCrossEntropyWithLogitsGradient);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,22 @@ class MakeTwoClassGradientOp final
|
||||||
// Ouptut: dX
|
// Ouptut: dX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, class Context>
|
||||||
|
class SigmoidCrossEntropyWithLogitsOp final : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_SIMPLE_CTOR_DTOR(SigmoidCrossEntropyWithLogitsOp);
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
bool RunOnDevice() override;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, class Context>
|
||||||
|
class SigmoidCrossEntropyWithLogitsGradientOp final : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_SIMPLE_CTOR_DTOR(SigmoidCrossEntropyWithLogitsGradientOp);
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
bool RunOnDevice() override;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
|
||||||
#endif // CAFFE2_OPERATORS_CROSS_ENTROPY_OP_H_
|
#endif // CAFFE2_OPERATORS_CROSS_ENTROPY_OP_H_
|
||||||
|
|
|
||||||
734
caffe2/operators/dataset_ops.cc
Normal file
734
caffe2/operators/dataset_ops.cc
Normal file
|
|
@ -0,0 +1,734 @@
|
||||||
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include "caffe2/core/operator.h"
|
||||||
|
#include "caffe2/core/tensor.h"
|
||||||
|
#include "caffe2/utils/string_utils.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
const char kDatasetFieldSeparator = ':';
|
||||||
|
const char* kDatasetLengthField = "lengths";
|
||||||
|
|
||||||
|
// how much percent to grow the dataset when needed
|
||||||
|
const int kDatasetGrowthPct = 40;
|
||||||
|
|
||||||
|
// used for lengths tensors in the dataset
|
||||||
|
using TLength = int32_t;
|
||||||
|
// used for all internal dataset operations (offsets, sizes to read, etc.)
|
||||||
|
using TOffset = int64_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides functionality to iterate across a list of tensors where some
|
||||||
|
* of those tensors represent lengths in a hierarchical structure.
|
||||||
|
*/
|
||||||
|
class TreeIterator {
|
||||||
|
public:
|
||||||
|
struct FieldDesc {
|
||||||
|
int id;
|
||||||
|
int lengthFieldId = -1;
|
||||||
|
std::string name;
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit TreeIterator(const std::vector<std::string>& fields) {
|
||||||
|
// populate field vector and split field names
|
||||||
|
fields_.resize(fields.size());
|
||||||
|
std::vector<std::vector<std::string>> nameParts(fields_.size());
|
||||||
|
for (int i = 0; i < fields.size(); ++i) {
|
||||||
|
auto& field = fields_.at(i);
|
||||||
|
field.name = fields[i];
|
||||||
|
field.id = i;
|
||||||
|
field.lengthFieldId = -1;
|
||||||
|
nameParts.at(i) = split(kDatasetFieldSeparator, field.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
// populate lengthFields
|
||||||
|
for (const auto& field : fields_) {
|
||||||
|
const auto& parts = nameParts.at(field.id);
|
||||||
|
if (!parts.empty() && parts.back() == kDatasetLengthField) {
|
||||||
|
lengthFieldIds_.push_back(field.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// find length-field with maximum prefix matching for each field
|
||||||
|
for (auto& field : fields_) {
|
||||||
|
// by default, we are matching against the root domain
|
||||||
|
int maxMatchLevel = 1;
|
||||||
|
int maxMatchLengthFieldId = -1;
|
||||||
|
for (int j = 0; j < numLengthFields(); ++j) {
|
||||||
|
const auto& lenField = lengthField(j);
|
||||||
|
// a length field can't have itself as its length field
|
||||||
|
if (field.id == lenField.id) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto lf = nameParts.at(lenField.id);
|
||||||
|
auto lfEnd = lf.end() - 1;
|
||||||
|
// check whether this lengthField is a prefix for this field name
|
||||||
|
if (std::mismatch(lf.begin(), lfEnd, nameParts.at(field.id).begin())
|
||||||
|
.first != lfEnd) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (lf.size() > maxMatchLevel) {
|
||||||
|
maxMatchLevel = lf.size();
|
||||||
|
maxMatchLengthFieldId = j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
field.lengthFieldId = maxMatchLengthFieldId;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check that fields are topologically sorted
|
||||||
|
// (no length field depends on a length defined afterwards)
|
||||||
|
for (const auto& field : fields_) {
|
||||||
|
const auto* lengthField = lengthFieldFor(field);
|
||||||
|
CAFFE_ENFORCE(
|
||||||
|
(lengthField == nullptr) || (lengthField->id < field.id),
|
||||||
|
"Error: Field ",
|
||||||
|
field.id,
|
||||||
|
" (",
|
||||||
|
field.name,
|
||||||
|
") ",
|
||||||
|
"depends on a field defined afterwards: ",
|
||||||
|
lengthField->id,
|
||||||
|
" (",
|
||||||
|
lengthField->name,
|
||||||
|
").");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void advance(
|
||||||
|
const std::vector<const TLength*>& lengths,
|
||||||
|
std::vector<TOffset>& offsets,
|
||||||
|
std::vector<TOffset>& sizes,
|
||||||
|
std::vector<TOffset>& limits,
|
||||||
|
TOffset num) {
|
||||||
|
thread_local std::vector<TOffset> newOffsets;
|
||||||
|
CHECK_EQ(lengths.size(), numLengthFields());
|
||||||
|
CHECK_EQ(offsets.size(), numOffsetFields());
|
||||||
|
sizes.resize(offsets.size());
|
||||||
|
newOffsets.resize(offsets.size());
|
||||||
|
// first index, top level
|
||||||
|
{
|
||||||
|
auto limit = limits[0];
|
||||||
|
auto offset = offsets[0];
|
||||||
|
CAFFE_ENFORCE(limit >= offset, "Tried to advance past end of cursor.");
|
||||||
|
TOffset total = std::min(limit - offset, num);
|
||||||
|
sizes[0] = total;
|
||||||
|
newOffsets[0] = offset + total;
|
||||||
|
}
|
||||||
|
// child indices
|
||||||
|
for (int j = 1; j < numOffsetFields(); ++j) {
|
||||||
|
TOffset total = 0;
|
||||||
|
int parentOffsetId = offsetFieldIdFor(lengthField(j - 1));
|
||||||
|
const TLength* length = lengths[j - 1] + offsets[parentOffsetId];
|
||||||
|
for (int k = 0; k < sizes[parentOffsetId]; ++k) {
|
||||||
|
total += *(length++);
|
||||||
|
}
|
||||||
|
auto offset = offsets[j];
|
||||||
|
CAFFE_ENFORCE(
|
||||||
|
offset + total <= limits[j],
|
||||||
|
"Inconsistent field length: ",
|
||||||
|
"tried to advance past the end of field ",
|
||||||
|
j);
|
||||||
|
sizes[j] = total;
|
||||||
|
newOffsets[j] = offset + total;
|
||||||
|
}
|
||||||
|
offsets = newOffsets;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Corresponds to the number of fields that have "length" as its last name
|
||||||
|
int numLengthFields() const {
|
||||||
|
return lengthFieldIds_.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Corresponds to the number of length fields + 1 (for the top-level domain)
|
||||||
|
int numOffsetFields() const {
|
||||||
|
return numLengthFields() + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get lengthField description for the given field
|
||||||
|
const FieldDesc* lengthFieldFor(const FieldDesc& desc) {
|
||||||
|
return (desc.lengthFieldId == -1)
|
||||||
|
? nullptr
|
||||||
|
: &fields_.at(lengthFieldIds_.at(desc.lengthFieldId));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get lengthField description for the given lengthFieldId, where
|
||||||
|
// 0 <= lengthFieldId < numLengthFields()
|
||||||
|
const FieldDesc& lengthField(int lengthFieldId) {
|
||||||
|
return fields_.at(lengthFieldIds_.at(lengthFieldId));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the index into the 'offset' vector for the given field.
|
||||||
|
int offsetFieldIdFor(const FieldDesc& fieldDesc) {
|
||||||
|
return fieldDesc.lengthFieldId + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the field description for all fields.
|
||||||
|
const std::vector<FieldDesc>& fields() {
|
||||||
|
return fields_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Description of each field
|
||||||
|
std::vector<FieldDesc> fields_;
|
||||||
|
// Index into fields_ above for the fields that are lengths.
|
||||||
|
std::vector<int> lengthFieldIds_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class TreeCursor {
|
||||||
|
public:
|
||||||
|
explicit TreeCursor(const TreeIterator& iterator) : it(iterator) {}
|
||||||
|
std::vector<TOffset> offsets;
|
||||||
|
std::mutex mutex_;
|
||||||
|
TreeIterator it;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CreateTreeCursorOp : public Operator<CPUContext> {
|
||||||
|
public:
|
||||||
|
CreateTreeCursorOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator(operator_def, ws),
|
||||||
|
fields_(OperatorBase::GetRepeatedArgument<std::string>("fields")) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
*OperatorBase::Output<std::unique_ptr<TreeCursor>>(0) =
|
||||||
|
std::unique_ptr<TreeCursor>(new TreeCursor(TreeIterator(fields_)));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<std::string> fields_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ResetCursorOp : public Operator<CPUContext> {
|
||||||
|
public:
|
||||||
|
ResetCursorOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& cursor = OperatorBase::Input<std::unique_ptr<TreeCursor>>(0);
|
||||||
|
std::lock_guard<std::mutex> lock(cursor->mutex_);
|
||||||
|
cursor->offsets.clear();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class CheckDatasetConsistencyOp : public Operator<CPUContext> {
|
||||||
|
public:
|
||||||
|
CheckDatasetConsistencyOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator(operator_def, ws),
|
||||||
|
iterator_(OperatorBase::GetRepeatedArgument<std::string>("fields")) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
thread_local std::vector<const TLength*> lengths;
|
||||||
|
thread_local std::vector<TOffset> limits;
|
||||||
|
thread_local std::vector<TOffset> sizes;
|
||||||
|
thread_local std::vector<TOffset> offsets;
|
||||||
|
CAFFE_ENFORCE(
|
||||||
|
InputSize() == iterator_.fields().size(),
|
||||||
|
"Invalid number of fields. Expected ",
|
||||||
|
iterator_.fields().size(),
|
||||||
|
", got ",
|
||||||
|
InputSize());
|
||||||
|
sizes.resize(iterator_.numOffsetFields());
|
||||||
|
// gather length data
|
||||||
|
lengths.resize(iterator_.numLengthFields());
|
||||||
|
for (int i = 0; i < lengths.size(); ++i) {
|
||||||
|
lengths[i] = Input(iterator_.lengthField(i).id).data<TLength>();
|
||||||
|
}
|
||||||
|
// gather size limits
|
||||||
|
limits.assign(sizes.size(), std::numeric_limits<TOffset>::max());
|
||||||
|
for (int i = 0; i < iterator_.fields().size(); ++i) {
|
||||||
|
int lengthIdx = iterator_.fields()[i].lengthFieldId + 1;
|
||||||
|
TOffset size = (TOffset)Input(i).dims()[0];
|
||||||
|
if (limits[lengthIdx] == std::numeric_limits<TOffset>::max()) {
|
||||||
|
limits[lengthIdx] = size;
|
||||||
|
} else {
|
||||||
|
CAFFE_ENFORCE(
|
||||||
|
limits[lengthIdx] == size,
|
||||||
|
"Inconsistent sizes for fields belonging to same domain.",
|
||||||
|
" Field: ",
|
||||||
|
i,
|
||||||
|
" (",
|
||||||
|
iterator_.fields()[i].name,
|
||||||
|
"); Length field index: ",
|
||||||
|
lengthIdx,
|
||||||
|
"); Previous size: ",
|
||||||
|
limits[lengthIdx],
|
||||||
|
"; New size: ",
|
||||||
|
size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// advance to the end
|
||||||
|
offsets.assign(sizes.size(), 0);
|
||||||
|
iterator_.advance(lengths, offsets, sizes, limits, limits[0]);
|
||||||
|
for (int i = 0; i < limits.size(); ++i) {
|
||||||
|
CAFFE_ENFORCE(limits[i] == offsets[i]);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
TreeIterator iterator_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ReadNextBatchOp : public Operator<CPUContext> {
|
||||||
|
public:
|
||||||
|
ReadNextBatchOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator(operator_def, ws),
|
||||||
|
batchSize_(OperatorBase::GetSingleArgument<int>("batch_size", 1)) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& cursor = OperatorBase::Input<std::unique_ptr<TreeCursor>>(0);
|
||||||
|
CAFFE_ENFORCE(InputSize() == cursor->it.fields().size() + 1);
|
||||||
|
thread_local std::vector<const TLength*> lengths;
|
||||||
|
thread_local std::vector<TOffset> limits;
|
||||||
|
thread_local std::vector<TOffset> sizes;
|
||||||
|
thread_local std::vector<TOffset> offsets;
|
||||||
|
sizes.resize(cursor->it.numOffsetFields());
|
||||||
|
// gather length data
|
||||||
|
lengths.resize(cursor->it.numLengthFields());
|
||||||
|
for (int i = 0; i < lengths.size(); ++i) {
|
||||||
|
lengths[i] = Input(cursor->it.lengthField(i).id + 1).data<int>();
|
||||||
|
}
|
||||||
|
// gather size limits
|
||||||
|
limits.assign(sizes.size(), std::numeric_limits<TOffset>::max());
|
||||||
|
for (int i = 0; i < cursor->it.fields().size(); ++i) {
|
||||||
|
int lengthFieldIdx = cursor->it.fields()[i].lengthFieldId + 1;
|
||||||
|
limits[lengthFieldIdx] =
|
||||||
|
std::min(limits[lengthFieldIdx], (TOffset)Input(i + 1).dims()[0]);
|
||||||
|
}
|
||||||
|
// advance cursor
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(cursor->mutex_);
|
||||||
|
if (cursor->offsets.empty()) {
|
||||||
|
cursor->offsets.assign(sizes.size(), 0);
|
||||||
|
}
|
||||||
|
offsets = cursor->offsets;
|
||||||
|
cursor->it.advance(lengths, cursor->offsets, sizes, limits, batchSize_);
|
||||||
|
}
|
||||||
|
// gather data
|
||||||
|
thread_local std::vector<TIndex> outDim;
|
||||||
|
for (int i = 0; i < cursor->it.fields().size(); ++i) {
|
||||||
|
auto lengthIdx = cursor->it.fields()[i].lengthFieldId + 1;
|
||||||
|
auto size = sizes[lengthIdx];
|
||||||
|
auto offset = offsets[lengthIdx];
|
||||||
|
auto& in = Input(i + 1);
|
||||||
|
auto innerSize = in.size_from_dim(1);
|
||||||
|
outDim = in.dims();
|
||||||
|
outDim[0] = size;
|
||||||
|
auto* out = Output(i);
|
||||||
|
out->Resize(outDim);
|
||||||
|
if (out->size() == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
void* src =
|
||||||
|
(char*)in.raw_data() + offset * innerSize * in.meta().itemsize();
|
||||||
|
void* dst = out->raw_mutable_data(in.meta());
|
||||||
|
context_.template CopyItems<CPUContext, CPUContext>(
|
||||||
|
in.meta(), out->size(), src, dst);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
int batchSize_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ComputeOffsetOp : public Operator<CPUContext> {
|
||||||
|
public:
|
||||||
|
ComputeOffsetOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& cursor = OperatorBase::Input<std::unique_ptr<TreeCursor>>(0);
|
||||||
|
CAFFE_ENFORCE(InputSize() == cursor->it.fields().size() + 1);
|
||||||
|
auto* out = Output(0);
|
||||||
|
std::vector<const TLength*> lengths;
|
||||||
|
std::vector<TOffset> limits;
|
||||||
|
std::vector<TOffset> sizes;
|
||||||
|
std::vector<TOffset> offsets;
|
||||||
|
sizes.resize(cursor->it.numOffsetFields());
|
||||||
|
// gather length data
|
||||||
|
lengths.resize(cursor->it.numLengthFields());
|
||||||
|
for (int i = 0; i < lengths.size(); ++i) {
|
||||||
|
lengths[i] = Input(cursor->it.lengthField(i).id + 1).data<int>();
|
||||||
|
}
|
||||||
|
// gather size limits
|
||||||
|
limits.assign(sizes.size(), std::numeric_limits<TOffset>::max());
|
||||||
|
for (int i = 0; i < cursor->it.fields().size(); ++i) {
|
||||||
|
int lengthFieldIdx = cursor->it.fields()[i].lengthFieldId + 1;
|
||||||
|
limits[lengthFieldIdx] =
|
||||||
|
std::min(limits[lengthFieldIdx], (TOffset)Input(i + 1).dims()[0]);
|
||||||
|
}
|
||||||
|
out->Resize(limits.at(0) + 1, sizes.size());
|
||||||
|
auto* out_data = out->mutable_data<int64_t>();
|
||||||
|
for (int k = 0; k <= limits.at(0); k++) {
|
||||||
|
// advance cursor
|
||||||
|
if (cursor->offsets.empty()) {
|
||||||
|
cursor->offsets.assign(sizes.size(), 0);
|
||||||
|
}
|
||||||
|
// write output
|
||||||
|
std::copy(cursor->offsets.begin(), cursor->offsets.end(), out_data);
|
||||||
|
out_data += sizes.size();
|
||||||
|
cursor->it.advance(lengths, cursor->offsets, sizes, limits, 1);
|
||||||
|
}
|
||||||
|
cursor->offsets.assign(sizes.size(), 0); // reSet after getting meta info
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class ReadRandomBatchOp : public Operator<CPUContext> {
|
||||||
|
public:
|
||||||
|
ReadRandomBatchOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator(operator_def, ws),
|
||||||
|
batchSize_(OperatorBase::GetSingleArgument<int>("batch_size", 1)) {}
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& cursor = OperatorBase::Input<std::unique_ptr<TreeCursor>>(0);
|
||||||
|
auto& idxblob = Input(1);
|
||||||
|
auto& offsetsmat = Input(2);
|
||||||
|
CAFFE_ENFORCE(InputSize() == cursor->it.fields().size() + 3);
|
||||||
|
auto idxvec = idxblob.template data<int64_t>();
|
||||||
|
auto& offsetdim = offsetsmat.dims();
|
||||||
|
// gather data
|
||||||
|
thread_local std::vector<TIndex> outDim;
|
||||||
|
int64_t idx;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(cursor->mutex_);
|
||||||
|
cursor->offsets.resize(1);
|
||||||
|
idx = cursor->offsets.at(0);
|
||||||
|
cursor->offsets.at(0) += batchSize_;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < cursor->it.fields().size(); ++i) {
|
||||||
|
auto lengthIdx = cursor->it.fields()[i].lengthFieldId + 1;
|
||||||
|
auto& in = Input(i + 3);
|
||||||
|
outDim = in.dims();
|
||||||
|
outDim.at(0) = 0;
|
||||||
|
auto idxbegin = idx;
|
||||||
|
for (int j = 0; j < batchSize_; ++j) {
|
||||||
|
if (idx >= idxblob.size()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
CAFFE_ENFORCE(
|
||||||
|
(idxvec[idx] + 1) * offsetdim[1] + lengthIdx < offsetsmat.size(),
|
||||||
|
"Out of bound when trying to get elem from offsetsmat");
|
||||||
|
auto offsetptr = offsetsmat.template data<TOffset>() +
|
||||||
|
idxvec[idx] * offsetdim[1] + lengthIdx;
|
||||||
|
auto offset = *offsetptr;
|
||||||
|
auto size = *(offsetptr + offsetdim[1]) - offset;
|
||||||
|
outDim.at(0) += size; // accumulate over the batch
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
idx = idxbegin; // reSet
|
||||||
|
auto* out = Output(i);
|
||||||
|
out->Resize(outDim);
|
||||||
|
if (out->size() == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto innerSize = in.size_from_dim(1);
|
||||||
|
auto dst = static_cast<char*>(out->raw_mutable_data(in.meta()));
|
||||||
|
int block_size = in.size() / in.dim(0);
|
||||||
|
int block_bytesize = in.nbytes() / in.dim(0);
|
||||||
|
int start = 0;
|
||||||
|
for (int j = 0; j < batchSize_; ++j) {
|
||||||
|
if (idx >= idxblob.size()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
auto offsetptr = offsetsmat.template data<TOffset>() +
|
||||||
|
idxvec[idx] * offsetdim[1] + lengthIdx;
|
||||||
|
auto offset = *offsetptr;
|
||||||
|
auto size = *(offsetptr + offsetdim[1]) - offset;
|
||||||
|
// copy data
|
||||||
|
void* src =
|
||||||
|
(char*)in.raw_data() + offset * innerSize * in.meta().itemsize();
|
||||||
|
context_.template CopyItems<CPUContext, CPUContext>(
|
||||||
|
in.meta(), size * block_size, src, dst + start * block_bytesize);
|
||||||
|
start += size;
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
idx = idxbegin; // reSet
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
int batchSize_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Context>
|
||||||
|
class AppendOp final : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
AppendOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<Context>(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& a = Input(0);
|
||||||
|
auto& b = Input(1);
|
||||||
|
auto* c = Output(0);
|
||||||
|
CAFFE_ENFORCE(b.ndim() >= 1);
|
||||||
|
if (a.size() == 0) {
|
||||||
|
c->CopyFrom(b);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
CAFFE_ENFORCE(&a == c, "First argument must be in-place.");
|
||||||
|
CAFFE_ENFORCE(c->ndim() == b.ndim());
|
||||||
|
CAFFE_ENFORCE(b.ndim() == c->ndim());
|
||||||
|
CAFFE_ENFORCE(a.meta() == b.meta());
|
||||||
|
for (int i = 1; i < a.ndim(); ++i) {
|
||||||
|
CAFFE_ENFORCE(a.dims()[i] == b.dims()[i]);
|
||||||
|
}
|
||||||
|
auto oldSize = c->size();
|
||||||
|
c->Extend(b.dims()[0], kDatasetGrowthPct, &context_);
|
||||||
|
auto* dst = (char*)c->raw_mutable_data() + oldSize * b.meta().itemsize();
|
||||||
|
context_.template CopyItems<Context, Context>(
|
||||||
|
b.meta(), b.size(), b.raw_data(), dst);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Context>
|
||||||
|
class AtomicAppendOp final : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
AtomicAppendOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<Context>(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& mutex = OperatorBase::Input<std::unique_ptr<std::mutex>>(0);
|
||||||
|
const auto numFields = (InputSize() - 1) / 2;
|
||||||
|
CAFFE_ENFORCE(OutputSize() == numFields);
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> guard(*mutex);
|
||||||
|
|
||||||
|
// 1: checks
|
||||||
|
for (int i = 0; i < numFields; ++i) {
|
||||||
|
auto& a = Input(1 + i);
|
||||||
|
auto& b = Input(1 + i + numFields);
|
||||||
|
auto* c = Output(i);
|
||||||
|
CAFFE_ENFORCE(b.ndim() >= 1);
|
||||||
|
if (a.size() == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
CAFFE_ENFORCE(
|
||||||
|
(void*)&a == (void*)c, "Appended-to arguments must be in-place.");
|
||||||
|
CAFFE_ENFORCE(c->ndim() == b.ndim());
|
||||||
|
CAFFE_ENFORCE(b.ndim() == c->ndim());
|
||||||
|
CAFFE_ENFORCE(a.meta() == b.meta());
|
||||||
|
for (int j = 1; j < a.ndim(); ++j) {
|
||||||
|
CAFFE_ENFORCE(a.dims()[j] == b.dims()[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2: copies
|
||||||
|
for (int i = 0; i < numFields; ++i) {
|
||||||
|
auto& a = Input(1 + i);
|
||||||
|
auto& b = Input(1 + i + numFields);
|
||||||
|
auto* c = Output(i);
|
||||||
|
if (a.size() == 0) {
|
||||||
|
c->CopyFrom(b);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto oldSize = c->size();
|
||||||
|
c->Extend(b.dims()[0], kDatasetGrowthPct, &context_);
|
||||||
|
auto* dst = (char*)c->raw_mutable_data() + oldSize * b.meta().itemsize();
|
||||||
|
context_.template CopyItems<Context, Context>(
|
||||||
|
b.meta(), b.size(), b.raw_data(), dst);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
REGISTER_CPU_OPERATOR(CreateTreeCursor, CreateTreeCursorOp);
|
||||||
|
REGISTER_CPU_OPERATOR(ResetCursor, ResetCursorOp);
|
||||||
|
REGISTER_CPU_OPERATOR(ReadNextBatch, ReadNextBatchOp);
|
||||||
|
REGISTER_CPU_OPERATOR(ComputeOffset, ComputeOffsetOp);
|
||||||
|
REGISTER_CPU_OPERATOR(ReadRandomBatch, ReadRandomBatchOp);
|
||||||
|
REGISTER_CPU_OPERATOR(CheckDatasetConsistency, CheckDatasetConsistencyOp);
|
||||||
|
REGISTER_CPU_OPERATOR(Append, AppendOp<CPUContext>);
|
||||||
|
REGISTER_CPU_OPERATOR(AtomicAppend, AtomicAppendOp<CPUContext>);
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(CreateTreeCursor)
|
||||||
|
.NumInputs(0)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Creates a cursor to iterate through a list of tensors, where some of those
|
||||||
|
tensors contains the lengths in a nested schema. The schema is determined by
|
||||||
|
the `fields` arguments.
|
||||||
|
|
||||||
|
For example, to represent the following schema:
|
||||||
|
|
||||||
|
Struct(
|
||||||
|
a=Int(),
|
||||||
|
b=List(List(Int),
|
||||||
|
c=List(
|
||||||
|
Struct(
|
||||||
|
c1=String,
|
||||||
|
c2=List(Int),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
the field list will be:
|
||||||
|
[
|
||||||
|
"a",
|
||||||
|
"b:lengths",
|
||||||
|
"b:values:lengths",
|
||||||
|
"b:values:values",
|
||||||
|
"c:lengths",
|
||||||
|
"c:c1",
|
||||||
|
"c:c2:lengths",
|
||||||
|
"c:c2:values",
|
||||||
|
]
|
||||||
|
|
||||||
|
And for the following instance of the struct:
|
||||||
|
|
||||||
|
Struct(
|
||||||
|
a=3,
|
||||||
|
b=[[4, 5], [6, 7, 8], [], [9]],
|
||||||
|
c=[
|
||||||
|
Struct(c1='alex', c2=[10, 11]),
|
||||||
|
Struct(c1='bob', c2=[12]),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
The values of the fields will be:
|
||||||
|
{
|
||||||
|
"a": [3],
|
||||||
|
"b:lengths": [4],
|
||||||
|
"b:values:lengths": [2, 3, 0, 1],
|
||||||
|
"b:values:values": [4, 5, 6, 7, 8, 9],
|
||||||
|
"c:lengths": [2],
|
||||||
|
"c:c1": ["alex", "bob"],
|
||||||
|
"c:c2:lengths": [2, 1],
|
||||||
|
"c:c2:values", [10, 11, 12],
|
||||||
|
}
|
||||||
|
|
||||||
|
In general, every field name in the format "{prefix}:lengths" defines a domain
|
||||||
|
"{prefix}", and every subsequent field in the format "{prefx}:{field}" will
|
||||||
|
be in that domain, and the length of the domain is provided for each entry of
|
||||||
|
the parent domain. In the example, "b:lengths" defines a domain of length 4, so
|
||||||
|
every field under domain "b" will have 4 entries.
|
||||||
|
The "lengths" field for a given domain must appear before any reference to
|
||||||
|
that domain.
|
||||||
|
|
||||||
|
Returns a pointer to an instance of the Cursor, which keeps the current offset
|
||||||
|
on each of the domains defined by `fields`. Cursor also ensures thread-safety
|
||||||
|
such that ReadNextBatch and ResetCursor can be used safely in parallel.
|
||||||
|
|
||||||
|
A cursor does not contain data per se, so calls to ReadNextBatch actually need
|
||||||
|
to pass a list of blobs containing the data to read for each one of the fields.
|
||||||
|
)DOC")
|
||||||
|
.Output(0, "cursor", "A blob pointing to an instance of a new TreeCursor.")
|
||||||
|
.Arg(
|
||||||
|
"fields",
|
||||||
|
"A list of strings each one representing a field of the dataset.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(ResetCursor)
|
||||||
|
.NumInputs(1)
|
||||||
|
.NumOutputs(0)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Resets the offsets for the given TreeCursor. This operation is thread safe.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "cursor", "A blob containing a pointer to the cursor.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(ReadNextBatch)
|
||||||
|
.NumInputs(1, INT_MAX)
|
||||||
|
.NumOutputs(1, INT_MAX)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Read the next batch of examples out of the given cursor and data blobs.
|
||||||
|
|
||||||
|
Input(0) is a blob pointing to a TreeCursor, and
|
||||||
|
[Input(1),... Input(num_fields)] a list of tensors containing the data for
|
||||||
|
each field of the dataset.
|
||||||
|
|
||||||
|
ReadNextBatch is thread safe.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "cursor", "A blob containing a pointer to the cursor.")
|
||||||
|
.Input(1, "dataset_field_0", "First dataset field")
|
||||||
|
.Output(0, "field_0", "Tensor containing the next batch for field 0.")
|
||||||
|
.Arg("batch_size", "Number of top-level entries to read.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(ComputeOffset)
|
||||||
|
.NumInputs(1, INT_MAX)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Compute the offsets matrix given cursor and data blobs. Need to be ran at
|
||||||
|
beginning or after reseting cursor
|
||||||
|
|
||||||
|
Input(0) is a blob pointing to a TreeCursor, and
|
||||||
|
[Input(1),... Input(num_fields)] a list of tensors containing the data for
|
||||||
|
each field of the dataset.
|
||||||
|
|
||||||
|
ComputeOffset is thread safe.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "cursor", "A blob containing a pointer to the cursor.")
|
||||||
|
.Input(1, "dataset_field_0", "First dataset field")
|
||||||
|
.Output(0, "field_0", "Tensor containing offset info for this chunk.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(ReadRandomBatch)
|
||||||
|
.NumInputs(1, INT_MAX)
|
||||||
|
.NumOutputs(1, INT_MAX)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Read the next batch of examples out of the given cursor,
|
||||||
|
idx blob, offset matrix and data blobs.
|
||||||
|
|
||||||
|
Input(0) is a blob pointing to a TreeCursor,
|
||||||
|
Input(1) is a blob pointing to the shuffled idx
|
||||||
|
Input(2) is a blob pointing to the offset matrix and
|
||||||
|
[Input(3),... Input(num_fields)] a list of tensors containing the data for
|
||||||
|
each field of the dataset.
|
||||||
|
|
||||||
|
ReadRandomBatch is thread safe.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "cursor", "A blob containing a pointer to the cursor.")
|
||||||
|
.Input(1, "idx", "idx with a shuffled order.")
|
||||||
|
.Input(2, "offsetsmat", "offset matrix containing length offset info.")
|
||||||
|
.Input(3, "dataset_field_0", "First dataset field")
|
||||||
|
.Output(0, "field_0", "Tensor containing the next batch for field 0.")
|
||||||
|
.Arg("batch_size", "Number of top-level entries to read.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(CheckDatasetConsistency)
|
||||||
|
.NumInputs(1, INT_MAX)
|
||||||
|
.NumOutputs(0)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Checks that the given data fields represents a consistent dataset unther
|
||||||
|
the schema specified by the `fields` argument. Operator fails if the fields
|
||||||
|
are not consistent. If data is consistent, each field's data can be safely
|
||||||
|
appended to an existing dataset, keeping it consistent.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "field_0", "Data for field 0.")
|
||||||
|
.Arg(
|
||||||
|
"fields",
|
||||||
|
"List of strings representing the string names in the format"
|
||||||
|
"specified in the doc for CreateTreeCursor.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(Append)
|
||||||
|
.NumInputs(2)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.EnforceInplace({{0, 0}})
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Append input 2 to the end of input 1.
|
||||||
|
Input 1 must be the same as output, that is, it is required to be in-place.
|
||||||
|
Input 1 may have to be re-allocated in order for accommodate to the new size.
|
||||||
|
Currently, an exponential growth ratio is used in order to ensure amortized
|
||||||
|
constant time complexity.
|
||||||
|
All except the outer-most dimension must be the same between input 1 and 2.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "dataset", "The tensor to be appended to.")
|
||||||
|
.Input(1, "new_data", "Tensor to append to the end of dataset.")
|
||||||
|
.Output(0, "dataset", "Same as input 0, representing the mutated tensor.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(AtomicAppend)
|
||||||
|
.NumInputs(3, INT_MAX)
|
||||||
|
.NumOutputs(1, INT_MAX)
|
||||||
|
.AllowInplace([](int in, int out) { return in == out + 1; });
|
||||||
|
|
||||||
|
SHOULD_NOT_DO_GRADIENT(CreateTreeCursor);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(ResetCursor);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(ReadNextBatch);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(ComputeOffset);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(ReadRandomBatch);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(CheckDatasetConsistency);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(Append);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(AtomicAppend);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -2,9 +2,9 @@
|
||||||
#define CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
|
#define CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
|
||||||
|
|
||||||
#include "caffe2/core/context.h"
|
#include "caffe2/core/context.h"
|
||||||
|
#include "caffe2/core/logging.h"
|
||||||
#include "caffe2/core/operator.h"
|
#include "caffe2/core/operator.h"
|
||||||
#include "caffe2/utils/math.h"
|
#include "caffe2/utils/math.h"
|
||||||
#include "caffe2/core/logging.h"
|
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
|
|
@ -21,12 +21,26 @@ struct TypeForOutput<SameTypeAsInput, InputType> {
|
||||||
using value = InputType;
|
using value = InputType;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename InputTypes, class Context, class Functor,
|
/**
|
||||||
|
* Generic meta-operator that is able to processes element-wise operations on
|
||||||
|
* a single-element tensor, returning a tensor with same shape, and either of
|
||||||
|
* the same type as the input or of a specified result type.
|
||||||
|
*
|
||||||
|
* The functor provided must implement operator() as a template on input and
|
||||||
|
* output types, and on a Context. Moreover, it needs to provide a constructor
|
||||||
|
* that takes OperatorBase& as argument. This is in order to consume arguments
|
||||||
|
* passed to the operator instance.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
typename InputTypes,
|
||||||
|
class Context,
|
||||||
|
class Functor,
|
||||||
class OutputType = SameTypeAsInput>
|
class OutputType = SameTypeAsInput>
|
||||||
class UnaryElementwiseOp : public Operator<Context> {
|
class UnaryElementwiseWithArgsOp : public Operator<Context> {
|
||||||
public:
|
public:
|
||||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
USE_SIMPLE_CTOR_DTOR(UnaryElementwiseOp);
|
UnaryElementwiseWithArgsOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<Context>(operator_def, ws), functor(*this) {}
|
||||||
|
|
||||||
bool RunOnDevice() override {
|
bool RunOnDevice() override {
|
||||||
return DispatchHelper<InputTypes>::call(this, Input(0));
|
return DispatchHelper<InputTypes>::call(this, Input(0));
|
||||||
|
|
@ -38,11 +52,67 @@ class UnaryElementwiseOp : public Operator<Context> {
|
||||||
auto* output = Output(0);
|
auto* output = Output(0);
|
||||||
output->ResizeLike(input);
|
output->ResizeLike(input);
|
||||||
using R = typename TypeForOutput<OutputType, T>::value;
|
using R = typename TypeForOutput<OutputType, T>::value;
|
||||||
Functor()(input.size(), input.template data<T>(),
|
functor(
|
||||||
output->template mutable_data<R>(), &context_);
|
input.size(),
|
||||||
|
input.template data<T>(),
|
||||||
|
output->template mutable_data<R>(),
|
||||||
|
&context_);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Functor functor;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* WithDefaultConstructor is a functor that can be used as the functor of an
|
||||||
|
* UnaryElementwiseWithArgsOp. It simply forwards the operator() call into
|
||||||
|
* another functor that doesn't accept arguments in its constructor.
|
||||||
|
*/
|
||||||
|
template <typename Functor>
|
||||||
|
struct WithDefaultConstructor {
|
||||||
|
explicit WithDefaultConstructor(OperatorBase& op) {}
|
||||||
|
|
||||||
|
template <typename In, typename Out, typename Context>
|
||||||
|
void operator()(int n, const In* in, Out* out, Context* c) {
|
||||||
|
Functor()(n, in, out, c);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* UnaryElementwiseOp is a wrapper around UnaryElementwiseWithArgsOp, with the
|
||||||
|
* difference that it takes a functor with default constructor, e.g. that does
|
||||||
|
* not need to take into consideration any arguments during operator creation.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
typename InputTypes,
|
||||||
|
class Context,
|
||||||
|
class Functor,
|
||||||
|
class OutputType = SameTypeAsInput>
|
||||||
|
using UnaryElementwiseOp = UnaryElementwiseWithArgsOp<
|
||||||
|
InputTypes,
|
||||||
|
Context,
|
||||||
|
WithDefaultConstructor<Functor>,
|
||||||
|
OutputType>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ForEach is a unary functor that forwards each element of the input array
|
||||||
|
* into the elementwise Functor provided, and gathers the results of each
|
||||||
|
* call into the resulting array. Use it as an adaptor if you want to create
|
||||||
|
* a UnaryElementwiseOp that acts on each element of the tensor per function
|
||||||
|
* call -- this is resonable for complex types where vectorization wouldn't
|
||||||
|
* be much of a gain, performance-wise.
|
||||||
|
*/
|
||||||
|
template <typename Functor>
|
||||||
|
struct ForEach {
|
||||||
|
explicit ForEach(OperatorBase& op) : functor(op) {}
|
||||||
|
|
||||||
|
template <typename In, typename Out, typename Context>
|
||||||
|
void operator()(int n, const In* in, Out* out, Context* c) {
|
||||||
|
for (int i = 0; i < n; ++i) {
|
||||||
|
out[i] = functor(in[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Functor functor;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -192,8 +262,12 @@ class DivGradientOp final : public Operator<Context> {
|
||||||
} \
|
} \
|
||||||
}; \
|
}; \
|
||||||
template <class DC> \
|
template <class DC> \
|
||||||
using name##Op = BinaryElementwiseOp<NumericTypes, DC, \
|
using name##Op = BinaryElementwiseOp< \
|
||||||
name##Functor, SameTypeAsInput, true>
|
NumericTypes, \
|
||||||
|
DC, \
|
||||||
|
name##Functor, \
|
||||||
|
SameTypeAsInput, \
|
||||||
|
true>
|
||||||
|
|
||||||
CAFFE2_BINARY_FUNCTOR_WRAPPER(Add);
|
CAFFE2_BINARY_FUNCTOR_WRAPPER(Add);
|
||||||
CAFFE2_BINARY_FUNCTOR_WRAPPER(Sub);
|
CAFFE2_BINARY_FUNCTOR_WRAPPER(Sub);
|
||||||
|
|
@ -225,8 +299,8 @@ CAFFE2_BINARY_FUNCTOR_WRAPPER(Div);
|
||||||
} \
|
} \
|
||||||
}; \
|
}; \
|
||||||
template <class DC> \
|
template <class DC> \
|
||||||
using name##Op = BinaryElementwiseOp< \
|
using name##Op = \
|
||||||
NumericTypes, DC, name##Functor, bool, true>
|
BinaryElementwiseOp<NumericTypes, DC, name##Functor, bool, true>
|
||||||
|
|
||||||
CAFFE2_BINARY_FUNCTOR_BINARY_RESULT_WRAPPER(LT);
|
CAFFE2_BINARY_FUNCTOR_BINARY_RESULT_WRAPPER(LT);
|
||||||
CAFFE2_BINARY_FUNCTOR_BINARY_RESULT_WRAPPER(LE);
|
CAFFE2_BINARY_FUNCTOR_BINARY_RESULT_WRAPPER(LE);
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ REGISTER_CPU_OPERATOR(UniformFill, UniformFillOp<float, CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(UniformIntFill, UniformFillOp<int, CPUContext>);
|
REGISTER_CPU_OPERATOR(UniformIntFill, UniformFillOp<int, CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(ConstantFill, ConstantFillOp<float, CPUContext>);
|
REGISTER_CPU_OPERATOR(ConstantFill, ConstantFillOp<float, CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(ConstantIntFill, ConstantFillOp<int, CPUContext>);
|
REGISTER_CPU_OPERATOR(ConstantIntFill, ConstantFillOp<int, CPUContext>);
|
||||||
|
REGISTER_CPU_OPERATOR(ConstantBoolFill, ConstantFillOp<bool, CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(GivenTensorFill, GivenTensorFillOp<float, CPUContext>);
|
REGISTER_CPU_OPERATOR(GivenTensorFill, GivenTensorFillOp<float, CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(GivenTensorIntFill, GivenTensorFillOp<int, CPUContext>);
|
REGISTER_CPU_OPERATOR(GivenTensorIntFill, GivenTensorFillOp<int, CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(GaussianFill, GaussianFillOp<float, CPUContext>);
|
REGISTER_CPU_OPERATOR(GaussianFill, GaussianFillOp<float, CPUContext>);
|
||||||
|
|
@ -30,6 +31,10 @@ OPERATOR_SCHEMA(UniformFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}}
|
||||||
OPERATOR_SCHEMA(UniformIntFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
OPERATOR_SCHEMA(UniformIntFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
||||||
OPERATOR_SCHEMA(ConstantFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
OPERATOR_SCHEMA(ConstantFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
||||||
OPERATOR_SCHEMA(ConstantIntFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
OPERATOR_SCHEMA(ConstantIntFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
||||||
|
OPERATOR_SCHEMA(ConstantBoolFill)
|
||||||
|
.NumInputs(0, 1)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.AllowInplace({{0, 0}});
|
||||||
OPERATOR_SCHEMA(GivenTensorFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
OPERATOR_SCHEMA(GivenTensorFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
||||||
OPERATOR_SCHEMA(GivenTensorIntFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
OPERATOR_SCHEMA(GivenTensorIntFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
||||||
OPERATOR_SCHEMA(GaussianFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
OPERATOR_SCHEMA(GaussianFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
||||||
|
|
|
||||||
|
|
@ -56,14 +56,12 @@ class FullyConnectedOp final : public Operator<Context> {
|
||||||
CAFFE_ENFORCE(N == b.dim32(0), dimErrorString());
|
CAFFE_ENFORCE(N == b.dim32(0), dimErrorString());
|
||||||
CAFFE_ENFORCE(N == b.size(), dimErrorString());
|
CAFFE_ENFORCE(N == b.size(), dimErrorString());
|
||||||
|
|
||||||
// Create the Y shape (without allocation)
|
Y_shape_cache_ = X.dims();
|
||||||
static thread_local vector<TIndex> Y_shape;
|
|
||||||
Y_shape = X.dims();
|
|
||||||
// This is an invariant of canonical_axis, so we can DCHECK.
|
// This is an invariant of canonical_axis, so we can DCHECK.
|
||||||
DCHECK_LE(canonical_axis + 1, Y_shape.size());
|
DCHECK_LE(canonical_axis + 1, Y_shape_cache_.size());
|
||||||
Y_shape.resize(canonical_axis + 1);
|
Y_shape_cache_.resize(canonical_axis + 1);
|
||||||
Y_shape[canonical_axis] = N;
|
Y_shape_cache_[canonical_axis] = N;
|
||||||
Y->Resize(Y_shape);
|
Y->Resize(Y_shape_cache_);
|
||||||
CAFFE_ENFORCE(M * N == Y->size(), dimErrorString());
|
CAFFE_ENFORCE(M * N == Y->size(), dimErrorString());
|
||||||
|
|
||||||
// W * x
|
// W * x
|
||||||
|
|
@ -88,6 +86,9 @@ class FullyConnectedOp final : public Operator<Context> {
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
size_t axis_{1};
|
size_t axis_{1};
|
||||||
|
// A local vector to cache the output shape so we don't need to recreate
|
||||||
|
// a vector object every time we run Run().
|
||||||
|
vector<TIndex> Y_shape_cache_;
|
||||||
Tensor<Context> bias_multiplier_;
|
Tensor<Context> bias_multiplier_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,6 @@
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
using IndexKeyTypes = TensorTypes<int32_t, int64_t, std::string>;
|
using IndexKeyTypes = TensorTypes<int32_t, int64_t, std::string>;
|
||||||
using TIndexValue = int64_t;
|
using TIndexValue = int64_t;
|
||||||
|
|
@ -22,12 +21,17 @@ struct IndexBase {
|
||||||
void Freeze() { frozen_ = true; }
|
void Freeze() { frozen_ = true; }
|
||||||
virtual ~IndexBase() {}
|
virtual ~IndexBase() {}
|
||||||
const TypeMeta& Type() const { return meta_; }
|
const TypeMeta& Type() const { return meta_; }
|
||||||
|
TIndexValue Size() {
|
||||||
|
std::lock_guard<std::mutex> guard(dictMutex_);
|
||||||
|
return nextId_;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
int64_t maxElements_;
|
int64_t maxElements_;
|
||||||
TypeMeta meta_;
|
TypeMeta meta_;
|
||||||
TIndexValue nextId_{1}; // guarded by dictMutex_
|
TIndexValue nextId_{1}; // guarded by dictMutex_
|
||||||
std::atomic<bool> frozen_{false};
|
std::atomic<bool> frozen_{false};
|
||||||
|
std::mutex dictMutex_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
|
@ -96,7 +100,6 @@ struct Index: IndexBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unordered_map<T, TIndexValue> dict_;
|
std::unordered_map<T, TIndexValue> dict_;
|
||||||
std::mutex dictMutex_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
|
|
@ -142,7 +145,9 @@ class IndexGetOp: public Operator<CPUContext> {
|
||||||
class IndexLoadOp: public Operator<CPUContext> {
|
class IndexLoadOp: public Operator<CPUContext> {
|
||||||
public:
|
public:
|
||||||
IndexLoadOp(const OperatorDef& operator_def, Workspace* ws)
|
IndexLoadOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
: Operator(operator_def, ws) {}
|
: Operator(operator_def, ws),
|
||||||
|
skipFirstEntry_(
|
||||||
|
OperatorBase::GetSingleArgument<int>("skip_first_entry", 0)) {}
|
||||||
|
|
||||||
bool RunOnDevice() override {
|
bool RunOnDevice() override {
|
||||||
return DispatchHelper<IndexKeyTypes>::call(this, Input(1));
|
return DispatchHelper<IndexKeyTypes>::call(this, Input(1));
|
||||||
|
|
@ -153,8 +158,18 @@ class IndexLoadOp: public Operator<CPUContext> {
|
||||||
auto* dict = dynamic_cast_if_rtti<Index<T>*>(base.get());
|
auto* dict = dynamic_cast_if_rtti<Index<T>*>(base.get());
|
||||||
CAFFE_ENFORCE(dict, "Wrong dictionary type given input keys.");
|
CAFFE_ENFORCE(dict, "Wrong dictionary type given input keys.");
|
||||||
const auto& keys = Input(1);
|
const auto& keys = Input(1);
|
||||||
return dict->Load(keys.data<T>(), keys.size());
|
const auto* keys_data = keys.data<T>();
|
||||||
|
auto keys_size = keys.size();
|
||||||
|
if (skipFirstEntry_) {
|
||||||
|
CAFFE_ENFORCE(keys.size() > 0);
|
||||||
|
++keys_data;
|
||||||
|
--keys_size;
|
||||||
}
|
}
|
||||||
|
return dict->Load(keys_data, keys_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool skipFirstEntry_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class IndexStoreOp: public Operator<CPUContext> {
|
class IndexStoreOp: public Operator<CPUContext> {
|
||||||
|
|
@ -188,6 +203,19 @@ class IndexFreezeOp: public Operator<CPUContext> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class IndexSizeOp : public Operator<CPUContext> {
|
||||||
|
public:
|
||||||
|
IndexSizeOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& base = OperatorBase::Input<std::unique_ptr<IndexBase>>(0);
|
||||||
|
auto* out = Output(0);
|
||||||
|
out->Resize(std::vector<TIndex>{});
|
||||||
|
*out->mutable_data<TIndexValue>() = base->Size();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
REGISTER_CPU_OPERATOR(IntIndexCreate, IndexCreateOp<int32_t>);
|
REGISTER_CPU_OPERATOR(IntIndexCreate, IndexCreateOp<int32_t>);
|
||||||
REGISTER_CPU_OPERATOR(LongIndexCreate, IndexCreateOp<int64_t>);
|
REGISTER_CPU_OPERATOR(LongIndexCreate, IndexCreateOp<int64_t>);
|
||||||
|
|
@ -197,6 +225,7 @@ REGISTER_CPU_OPERATOR(IndexGet, IndexGetOp);
|
||||||
REGISTER_CPU_OPERATOR(IndexLoad, IndexLoadOp);
|
REGISTER_CPU_OPERATOR(IndexLoad, IndexLoadOp);
|
||||||
REGISTER_CPU_OPERATOR(IndexStore, IndexStoreOp);
|
REGISTER_CPU_OPERATOR(IndexStore, IndexStoreOp);
|
||||||
REGISTER_CPU_OPERATOR(IndexFreeze, IndexFreezeOp);
|
REGISTER_CPU_OPERATOR(IndexFreeze, IndexFreezeOp);
|
||||||
|
REGISTER_CPU_OPERATOR(IndexSize, IndexSizeOp);
|
||||||
|
|
||||||
OPERATOR_SCHEMA(IntIndexCreate)
|
OPERATOR_SCHEMA(IntIndexCreate)
|
||||||
.NumInputs(0)
|
.NumInputs(0)
|
||||||
|
|
@ -250,7 +279,6 @@ Should not be called concurrently with IndexGet.
|
||||||
)DOC")
|
)DOC")
|
||||||
.Input(0, "handle", "Pointer to an Index instance.");
|
.Input(0, "handle", "Pointer to an Index instance.");
|
||||||
|
|
||||||
|
|
||||||
OPERATOR_SCHEMA(IndexLoad)
|
OPERATOR_SCHEMA(IndexLoad)
|
||||||
.NumInputs(2)
|
.NumInputs(2)
|
||||||
.NumOutputs(0)
|
.NumOutputs(0)
|
||||||
|
|
@ -259,7 +287,12 @@ Loads the index from the given 1-D tensor. Elements in the tensor will be given
|
||||||
consecutive indexes starting at 1. Fails if tensor contains repeated elements.
|
consecutive indexes starting at 1. Fails if tensor contains repeated elements.
|
||||||
)DOC")
|
)DOC")
|
||||||
.Input(0, "handle", "Pointer to an Index instance.")
|
.Input(0, "handle", "Pointer to an Index instance.")
|
||||||
.Input(1, "items", "1-D tensor with elements starting with index 1.");
|
.Input(1, "items", "1-D tensor with elements starting with index 1.")
|
||||||
|
.Arg(
|
||||||
|
"skip_first_entry",
|
||||||
|
"If set, skips the first entry of the tensor. This allows "
|
||||||
|
"to load tensors that are aligned with an embedding, where the first "
|
||||||
|
"entry corresponds to the default 0 index entry.");
|
||||||
|
|
||||||
OPERATOR_SCHEMA(IndexStore)
|
OPERATOR_SCHEMA(IndexStore)
|
||||||
.NumInputs(1)
|
.NumInputs(1)
|
||||||
|
|
@ -271,6 +304,15 @@ for unknowns, the first element of the output tensor will be element of index 1.
|
||||||
.Input(0, "handle", "Pointer to an Index instance.")
|
.Input(0, "handle", "Pointer to an Index instance.")
|
||||||
.Output(0, "items", "1-D tensor with elements starting with index 1.");
|
.Output(0, "items", "1-D tensor with elements starting with index 1.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(IndexSize)
|
||||||
|
.NumInputs(1)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Returns the number of entries currently present in the index.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "handle", "Pointer to an Index instance.")
|
||||||
|
.Output(0, "items", "Scalar int64 tensor with number of entries.");
|
||||||
|
|
||||||
NO_GRADIENT(IndexGetOp);
|
NO_GRADIENT(IndexGetOp);
|
||||||
NO_GRADIENT(IntIndexCreate);
|
NO_GRADIENT(IntIndexCreate);
|
||||||
NO_GRADIENT(LongIndexCreate);
|
NO_GRADIENT(LongIndexCreate);
|
||||||
|
|
@ -278,5 +320,5 @@ NO_GRADIENT(StringIndexCreate);
|
||||||
SHOULD_NOT_DO_GRADIENT(IndexFreeze);
|
SHOULD_NOT_DO_GRADIENT(IndexFreeze);
|
||||||
SHOULD_NOT_DO_GRADIENT(IndexLoad);
|
SHOULD_NOT_DO_GRADIENT(IndexLoad);
|
||||||
SHOULD_NOT_DO_GRADIENT(IndexStore);
|
SHOULD_NOT_DO_GRADIENT(IndexStore);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(IndexSize);
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
#include "caffe2/core/context.h"
|
#include "caffe2/core/context.h"
|
||||||
#include "caffe2/core/db.h"
|
#include "caffe2/core/db.h"
|
||||||
|
|
@ -65,12 +66,18 @@ class LoadOp final : public Operator<Context> {
|
||||||
// chunks. This way we can make sure that all chunks were loaded in the end.
|
// chunks. This way we can make sure that all chunks were loaded in the end.
|
||||||
// This is a map from output index to current size of the blob
|
// This is a map from output index to current size of the blob
|
||||||
std::map<int, size_t> blobSizes;
|
std::map<int, size_t> blobSizes;
|
||||||
|
std::unordered_set<string> loaded;
|
||||||
for (; cursor->Valid(); cursor->Next()) {
|
for (; cursor->Valid(); cursor->Next()) {
|
||||||
const string& key = cursor->key();
|
const string& key = cursor->key();
|
||||||
if (!output_indices_.count(key)) {
|
if (!output_indices_.count(key)) {
|
||||||
VLOG(1) << "Key " << key << " not used. Skipping.";
|
VLOG(1) << "Key " << key << " not used. Skipping.";
|
||||||
} else {
|
} else {
|
||||||
|
CAFFE_ENFORCE(
|
||||||
|
loaded.count(key) == 0,
|
||||||
|
"Multiple copies of blob ",
|
||||||
|
key,
|
||||||
|
" found in the db.");
|
||||||
|
|
||||||
VLOG(2) << "Deserializing blob " << key;
|
VLOG(2) << "Deserializing blob " << key;
|
||||||
BlobProto proto;
|
BlobProto proto;
|
||||||
CHECK(proto.ParseFromString(cursor->value()));
|
CHECK(proto.ParseFromString(cursor->value()));
|
||||||
|
|
@ -101,6 +108,15 @@ class LoadOp final : public Operator<Context> {
|
||||||
blobSize.first->second = blob->Get<Tensor<Context>>().size();
|
blobSize.first->second = blob->Get<Tensor<Context>>().size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!proto.has_tensor() ||
|
||||||
|
blobSize.first->second >= blob->Get<Tensor<Context>>().size()) {
|
||||||
|
loaded.insert(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loaded.size() >= OutputSize()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -116,6 +132,8 @@ class LoadOp final : public Operator<Context> {
|
||||||
blobSize.second);
|
blobSize.second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CHECK_EQ(loaded.size(), OutputSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
||||||
107
caffe2/operators/one_hot_ops.cc
Normal file
107
caffe2/operators/one_hot_ops.cc
Normal file
|
|
@ -0,0 +1,107 @@
|
||||||
|
#include "caffe2/core/operator.h"
|
||||||
|
#include "caffe2/core/tensor.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class OneHotOp : public Operator<CPUContext> {
|
||||||
|
public:
|
||||||
|
OneHotOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& indices = Input(0);
|
||||||
|
auto& index_size_tensor = Input(1);
|
||||||
|
CAFFE_ENFORCE(indices.ndim() == 1);
|
||||||
|
CAFFE_ENFORCE(index_size_tensor.size() == 1);
|
||||||
|
auto batch_size = indices.size();
|
||||||
|
auto index_size = *index_size_tensor.data<int64_t>();
|
||||||
|
|
||||||
|
auto* indices_ptr = indices.data<int64_t>();
|
||||||
|
auto* one_hots = Output(0);
|
||||||
|
one_hots->Resize(std::vector<TIndex>{batch_size, index_size});
|
||||||
|
if (one_hots->size() == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto* one_hots_ptr = one_hots->mutable_data<float>();
|
||||||
|
memset(one_hots_ptr, 0, one_hots->nbytes());
|
||||||
|
for (int i = 0; i < batch_size; ++i) {
|
||||||
|
auto label_idx = indices_ptr[i];
|
||||||
|
DCHECK((0 <= label_idx) && (label_idx < index_size));
|
||||||
|
one_hots_ptr[label_idx] = 1.0;
|
||||||
|
one_hots_ptr += index_size;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class SegmentOneHotOp : public Operator<CPUContext> {
|
||||||
|
public:
|
||||||
|
SegmentOneHotOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& lengths = Input(0);
|
||||||
|
auto& indices = Input(1);
|
||||||
|
auto& index_size_tensor = Input(2);
|
||||||
|
CAFFE_ENFORCE(lengths.ndim() == 1);
|
||||||
|
CAFFE_ENFORCE(indices.ndim() == 1);
|
||||||
|
CAFFE_ENFORCE(index_size_tensor.size() == 1);
|
||||||
|
auto batch_size = lengths.size();
|
||||||
|
auto index_size = *index_size_tensor.data<int64_t>();
|
||||||
|
CAFFE_ENFORCE(index_size > 0);
|
||||||
|
|
||||||
|
auto* lengths_ptr = lengths.data<int32_t>();
|
||||||
|
auto* indices_ptr = indices.data<int64_t>();
|
||||||
|
auto* one_hots = Output(0);
|
||||||
|
one_hots->Resize(std::vector<TIndex>{batch_size, index_size});
|
||||||
|
auto* one_hots_ptr = one_hots->mutable_data<float>();
|
||||||
|
if (one_hots->size() == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
memset(one_hots_ptr, 0, one_hots->nbytes());
|
||||||
|
int el_idx = 0;
|
||||||
|
for (int i = 0; i < batch_size; ++i) {
|
||||||
|
for (int j = 0; j < lengths_ptr[i]; ++j) {
|
||||||
|
DCHECK(el_idx < indices.size());
|
||||||
|
auto label_idx = indices_ptr[el_idx++];
|
||||||
|
DCHECK((0 <= label_idx) && (label_idx < index_size));
|
||||||
|
one_hots_ptr[label_idx] = 1.0;
|
||||||
|
}
|
||||||
|
one_hots_ptr += index_size;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
REGISTER_CPU_OPERATOR(OneHot, OneHotOp);
|
||||||
|
REGISTER_CPU_OPERATOR(SegmentOneHot, SegmentOneHotOp);
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(OneHot)
|
||||||
|
.NumInputs(2)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Given a sequence of indices, one for each example in a batch, returns a matrix
|
||||||
|
where each inner dimension has the size of the index and has 1.0 in the index
|
||||||
|
active in the given example, and 0.0 everywhere else.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "indices", "The active index for each example in the batch.")
|
||||||
|
.Input(1, "index_size_tensor", "Scalar with the size of the index.")
|
||||||
|
.Output(0, "one_hots", "Matrix of size len(indices) x index_size");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(SegmentOneHot)
|
||||||
|
.NumInputs(3)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Given a sequence of indices, segmented by the lengths tensor, returns a matrix
|
||||||
|
that has the elements in each sequence set to 1.0, and 0.0 everywhere else.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "lengths", "Size of each segment.")
|
||||||
|
.Input(1, "indices", "Active indices, of size sum(lengths)")
|
||||||
|
.Input(2, "index_size_tensor", "Size of the index")
|
||||||
|
.Output(0, "one_hots", "Matrix of size len(lengths) x index_size");
|
||||||
|
|
||||||
|
NO_GRADIENT(OneHot);
|
||||||
|
NO_GRADIENT(SegmentOneHot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -124,6 +124,67 @@ struct LogSumExpRangeReducerDef {
|
||||||
"input slices. Operation doesn't change the shape of individual blocks.";
|
"input slices. Operation doesn't change the shape of individual blocks.";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, class Context>
|
||||||
|
class LogMeanExpRangeReducer;
|
||||||
|
template <typename T, class Context>
|
||||||
|
class LogMeanExpRangeReducerGradient;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class LogMeanExpRangeReducer<T, CPUContext> {
|
||||||
|
public:
|
||||||
|
void operator()(
|
||||||
|
const TIndex block_size,
|
||||||
|
const TIndex blocks,
|
||||||
|
const T* in,
|
||||||
|
T* out,
|
||||||
|
CPUContext* context) {
|
||||||
|
for (int j = 0; j < block_size; ++j) {
|
||||||
|
T max_value = std::numeric_limits<T>::lowest();
|
||||||
|
for (int i = 0; i < blocks; ++i) {
|
||||||
|
max_value = std::max(max_value, in[i * block_size + j]);
|
||||||
|
}
|
||||||
|
T scaled_exp_sum = 0;
|
||||||
|
for (int i = 0; i < blocks; ++i) {
|
||||||
|
scaled_exp_sum += std::exp(in[i * block_size + j] - max_value);
|
||||||
|
}
|
||||||
|
scaled_exp_sum /= blocks;
|
||||||
|
*(out++) = std::log(scaled_exp_sum) + max_value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, class Context>
|
||||||
|
class LogMeanExpRangeReducerGradient {
|
||||||
|
public:
|
||||||
|
void operator()(
|
||||||
|
const TIndex block_size,
|
||||||
|
const TIndex blocks,
|
||||||
|
const T* segment_grad, // GO
|
||||||
|
T* data_grad, // GI
|
||||||
|
const T* data_in, // I
|
||||||
|
const T* data_out, // O
|
||||||
|
Context* context) {
|
||||||
|
for (int j = 0; j < block_size; ++j) {
|
||||||
|
const T out_grad = *(segment_grad++);
|
||||||
|
const T offset = *(data_out++);
|
||||||
|
for (int i = 0; i < blocks; ++i) {
|
||||||
|
auto idx = i * block_size + j;
|
||||||
|
data_grad[idx] = out_grad * std::exp(data_in[idx] - offset) / blocks;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct LogMeanExpRangeReducerDef {
|
||||||
|
template <typename T, class Context>
|
||||||
|
using Reducer = LogMeanExpRangeReducer<T, Context>;
|
||||||
|
template <typename T, class Context>
|
||||||
|
using ReducerGradient = LogMeanExpRangeReducerGradient<T, Context>;
|
||||||
|
static constexpr const char* name = "LogMeanExp";
|
||||||
|
static constexpr const char* doc =
|
||||||
|
"LogMeanExp computes the element-wise log of the mean of exponentials of "
|
||||||
|
"input slices. Operation doesn't change the shape of individual blocks.";
|
||||||
|
};
|
||||||
|
|
||||||
template <typename T, class Context>
|
template <typename T, class Context>
|
||||||
class MeanRangeReducer;
|
class MeanRangeReducer;
|
||||||
|
|
@ -180,10 +241,78 @@ struct MeanRangeReducerDef {
|
||||||
static constexpr const char* doc =
|
static constexpr const char* doc =
|
||||||
"Mean computation is done element-wise, so that each element of the "
|
"Mean computation is done element-wise, so that each element of the "
|
||||||
"output slice corresponds to the average value of the respective "
|
"output slice corresponds to the average value of the respective "
|
||||||
"elements in the input slives. Operation doesn't change the shape of "
|
"elements in the input slices. Operation doesn't change the shape of "
|
||||||
"individual blocks.";
|
"individual blocks.";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, class Context>
|
||||||
|
class MaxRangeReducer;
|
||||||
|
template <typename T, class Context>
|
||||||
|
class MaxRangeReducerGradient;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class MaxRangeReducer<T, CPUContext> {
|
||||||
|
public:
|
||||||
|
void operator()(
|
||||||
|
const TIndex block_size,
|
||||||
|
const TIndex blocks,
|
||||||
|
const T* in,
|
||||||
|
T* out,
|
||||||
|
CPUContext* context) {
|
||||||
|
for (int j = 0; j < block_size; ++j) {
|
||||||
|
T max_value = std::numeric_limits<T>::lowest();
|
||||||
|
for (int i = 0; i < blocks; ++i) {
|
||||||
|
max_value = std::max(max_value, in[i * block_size + j]);
|
||||||
|
}
|
||||||
|
*(out++) = max_value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, class Context>
|
||||||
|
class MaxRangeReducerGradient {
|
||||||
|
public:
|
||||||
|
void operator()(
|
||||||
|
const TIndex block_size,
|
||||||
|
const TIndex blocks,
|
||||||
|
const T* segment_grad, // GO
|
||||||
|
T* data_grad, // GI
|
||||||
|
const T* data_in, // I
|
||||||
|
const T* data_out, // O
|
||||||
|
Context* context) {
|
||||||
|
std::memset(
|
||||||
|
static_cast<void*>(data_grad), 0, blocks * block_size * sizeof(T));
|
||||||
|
for (int j = 0; j < block_size; ++j) {
|
||||||
|
const T out_grad = *(segment_grad++);
|
||||||
|
const T out = data_out[j];
|
||||||
|
for (int i = 0; i < blocks; ++i) {
|
||||||
|
auto idx = i * block_size + j;
|
||||||
|
if (out == data_in[idx]) {
|
||||||
|
data_grad[idx] = out_grad;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MaxRangeReducerDef {
|
||||||
|
template <typename T, class Context>
|
||||||
|
using Reducer = MaxRangeReducer<T, Context>;
|
||||||
|
template <typename T, class Context>
|
||||||
|
using ReducerGradient = MaxRangeReducerGradient<T, Context>;
|
||||||
|
static constexpr const char* name = "Max";
|
||||||
|
static constexpr const char* doc =
|
||||||
|
"Max computation is done element-wise, so that each element of the "
|
||||||
|
"output slice corresponds to the max value of the respective "
|
||||||
|
"elements in the input slices. Operation doesn't change the shape of "
|
||||||
|
"individual blocks. This implementation imitates torch nn.Max operator. "
|
||||||
|
"If the maximum value occurs more than once, the operator will return "
|
||||||
|
"the first occurence of value. When computing the gradient using the "
|
||||||
|
"backward propagation, the gradient input corresponding to the first "
|
||||||
|
"occurence of the maximum value will be used.";
|
||||||
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Incremental reducers: consume elements one by one
|
// Incremental reducers: consume elements one by one
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
||||||
|
|
@ -32,12 +32,16 @@ class CuDNNReluOp final : public Operator<CUDAContext> {
|
||||||
if (X.dims() != cudnn_input_dims_) {
|
if (X.dims() != cudnn_input_dims_) {
|
||||||
VLOG(1) << "Setting descriptors.";
|
VLOG(1) << "Setting descriptors.";
|
||||||
cudnn_input_dims_ = X.dims();
|
cudnn_input_dims_ = X.dims();
|
||||||
int C = (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(3));
|
int C = 1, H = 1, W = 1;
|
||||||
int H = 1;
|
|
||||||
int W = 1;
|
|
||||||
if (X.ndim() == 4) {
|
if (X.ndim() == 4) {
|
||||||
|
// Normal 4-dimensional tensors for images.
|
||||||
|
C = (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(3));
|
||||||
H = (order_ == StorageOrder::NCHW ? X.dim32(2) : X.dim32(1));
|
H = (order_ == StorageOrder::NCHW ? X.dim32(2) : X.dim32(1));
|
||||||
W = (order_ == StorageOrder::NCHW ? X.dim32(3) : X.dim32(2));
|
W = (order_ == StorageOrder::NCHW ? X.dim32(3) : X.dim32(2));
|
||||||
|
} else {
|
||||||
|
// If X is not 4-dimensional, we will simply use H = 1 and W = 1
|
||||||
|
// and wrap everything into C.
|
||||||
|
C = X.size() / X.dim32(0);
|
||||||
}
|
}
|
||||||
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||||
data_desc_, GetCudnnTensorFormat(order_),
|
data_desc_, GetCudnnTensorFormat(order_),
|
||||||
|
|
@ -93,12 +97,16 @@ class CuDNNReluGradientOp final : public Operator<CUDAContext> {
|
||||||
if (Y.dims() != cudnn_input_dims_) {
|
if (Y.dims() != cudnn_input_dims_) {
|
||||||
VLOG(1) << "Setting descriptors.";
|
VLOG(1) << "Setting descriptors.";
|
||||||
cudnn_input_dims_ = Y.dims();
|
cudnn_input_dims_ = Y.dims();
|
||||||
int C = (order_ == StorageOrder::NCHW ? Y.dim32(1) : Y.dim32(3));
|
int C = 1, H = 1, W = 1;
|
||||||
int H = 1;
|
|
||||||
int W = 1;
|
|
||||||
if (Y.ndim() == 4) {
|
if (Y.ndim() == 4) {
|
||||||
|
// Normal 4-dimensional tensors for images.
|
||||||
|
C = (order_ == StorageOrder::NCHW ? Y.dim32(1) : Y.dim32(3));
|
||||||
H = (order_ == StorageOrder::NCHW ? Y.dim32(2) : Y.dim32(1));
|
H = (order_ == StorageOrder::NCHW ? Y.dim32(2) : Y.dim32(1));
|
||||||
W = (order_ == StorageOrder::NCHW ? Y.dim32(3) : Y.dim32(2));
|
W = (order_ == StorageOrder::NCHW ? Y.dim32(3) : Y.dim32(2));
|
||||||
|
} else {
|
||||||
|
// If Y is not 4-dimensional, we will simply use H = 1 and W = 1
|
||||||
|
// and wrap everything into C.
|
||||||
|
C = Y.size() / Y.dim32(0);
|
||||||
}
|
}
|
||||||
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||||
data_desc_, GetCudnnTensorFormat(order_),
|
data_desc_, GetCudnnTensorFormat(order_),
|
||||||
|
|
|
||||||
35
caffe2/operators/reverse_packed_segs_op.cc
Normal file
35
caffe2/operators/reverse_packed_segs_op.cc
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
#include "caffe2/operators/reverse_packed_segs_op.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
namespace {
|
||||||
|
REGISTER_CPU_OPERATOR(ReversePackedSegs, ReversePackedSegsOp<CPUContext>);
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(ReversePackedSegs)
|
||||||
|
.NumInputs(2)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Reverse segments in a 3-D tensor (lengths, segments, embeddings,), leaving
|
||||||
|
paddings unchanged. This operator is used to reverse input of a recurrent neural
|
||||||
|
network to make it a BRNN.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "data", "a 3-D (lengths, segments, embeddings,) tensor.")
|
||||||
|
.Input(1, "lengths", "length of each segment.")
|
||||||
|
.Output(
|
||||||
|
0,
|
||||||
|
"reversed data",
|
||||||
|
"a (lengths, segments, embeddings,) tensor with each segment reversed"
|
||||||
|
"and paddings unchanged.");
|
||||||
|
|
||||||
|
class GetReversePackedSegsGradient : public GradientMakerBase {
|
||||||
|
using GradientMakerBase::GradientMakerBase;
|
||||||
|
vector<OperatorDef> GetGradientDefs() override {
|
||||||
|
return SingleGradientDef(
|
||||||
|
"ReversePackedSegs",
|
||||||
|
"",
|
||||||
|
vector<string>{GO(0), I(1)},
|
||||||
|
vector<string>{GI(0)});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
REGISTER_GRADIENT(ReversePackedSegs, GetReversePackedSegsGradient);
|
||||||
|
} // namespace
|
||||||
|
} // namespace caffe2
|
||||||
84
caffe2/operators/reverse_packed_segs_op.h
Normal file
84
caffe2/operators/reverse_packed_segs_op.h
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
#ifndef CAFFE2_OPERATORS_REVERSE_PACKED_SEGS_OP_H_
|
||||||
|
#define CAFFE2_OPERATORS_REVERSE_PACKED_SEGS_OP_H_
|
||||||
|
|
||||||
|
#include "caffe2/core/context.h"
|
||||||
|
#include "caffe2/core/operator.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
|
||||||
|
template <class Context>
|
||||||
|
class ReversePackedSegsOp final : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
USE_SIMPLE_CTOR_DTOR(ReversePackedSegsOp);
|
||||||
|
USE_DISPATCH_HELPER;
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
return DispatchHelper<TensorTypes<float, double, int, long, bool>>::call(
|
||||||
|
this, Input(DATA));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
bool DoRunWithType() {
|
||||||
|
if (Input(LENGTHS).template IsType<int>()) {
|
||||||
|
DoRunWithLengthType<T, int>();
|
||||||
|
} else {
|
||||||
|
DoRunWithLengthType<T, long>();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
INPUT_TAGS(DATA, LENGTHS);
|
||||||
|
|
||||||
|
template <typename T, typename LengthType>
|
||||||
|
void DoRunWithLengthType() {
|
||||||
|
const auto& data = Input(DATA);
|
||||||
|
const auto& lengths = Input(LENGTHS);
|
||||||
|
|
||||||
|
CAFFE_ENFORCE(
|
||||||
|
data.ndim() == 3,
|
||||||
|
"DATA should be 3-D tensor <lengths, "
|
||||||
|
"segments, embeddings>");
|
||||||
|
CAFFE_ENFORCE(lengths.ndim() == 1, "LENGTH should be 1-D");
|
||||||
|
|
||||||
|
auto* output = Output(0);
|
||||||
|
const auto& shape = data.dims();
|
||||||
|
output->Resize(shape);
|
||||||
|
|
||||||
|
const auto& max_length = data.dims()[0];
|
||||||
|
const auto& batch_size = data.dims()[1];
|
||||||
|
const auto& block_size = data.dims()[2];
|
||||||
|
CAFFE_ENFORCE(
|
||||||
|
lengths.dims()[0] == batch_size,
|
||||||
|
"lenths size should be"
|
||||||
|
" equal to batch size");
|
||||||
|
|
||||||
|
const T* data_ptr = data.template data<T>();
|
||||||
|
const LengthType* lengths_ptr = lengths.template data<LengthType>();
|
||||||
|
T* rev_data_ptr = output->template mutable_data<T>();
|
||||||
|
for (TIndex i = 0; i < batch_size; i++) {
|
||||||
|
const auto& seg_length = lengths_ptr[i];
|
||||||
|
CHECK_LE(seg_length, max_length);
|
||||||
|
TIndex j = 0;
|
||||||
|
for (; j < seg_length; j++) {
|
||||||
|
const T* data_block_ptr = data_ptr + (j * batch_size + i) * block_size;
|
||||||
|
T* rev_data_block_ptr =
|
||||||
|
rev_data_ptr + ((seg_length - 1 - j) * batch_size + i) * block_size;
|
||||||
|
context_.template Copy<T, Context, Context>(
|
||||||
|
block_size, data_block_ptr, rev_data_block_ptr);
|
||||||
|
}
|
||||||
|
for (; j < max_length; j++) {
|
||||||
|
const T* data_block_ptr = data_ptr + (j * batch_size + i) * block_size;
|
||||||
|
T* rev_data_block_ptr =
|
||||||
|
rev_data_ptr + (j * batch_size + i) * block_size;
|
||||||
|
context_.template Copy<T, Context, Context>(
|
||||||
|
block_size, data_block_ptr, rev_data_block_ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace caffe2
|
||||||
|
|
||||||
|
#endif // CAFFE2_OPERATORS_REVERSE_PACKED_SEGS_OP_H_
|
||||||
|
|
@ -1066,9 +1066,15 @@ REGISTER_SEGMENT_DEF(
|
||||||
REGISTER_SEGMENT_DEF(
|
REGISTER_SEGMENT_DEF(
|
||||||
AbstractSortedSegmentRangeDef<float, int, CPUContext,
|
AbstractSortedSegmentRangeDef<float, int, CPUContext,
|
||||||
LogSumExpRangeReducerDef>);
|
LogSumExpRangeReducerDef>);
|
||||||
|
REGISTER_SEGMENT_DEF(AbstractSortedSegmentRangeDef<
|
||||||
|
float,
|
||||||
|
int,
|
||||||
|
CPUContext,
|
||||||
|
LogMeanExpRangeReducerDef>);
|
||||||
REGISTER_SEGMENT_DEF(
|
REGISTER_SEGMENT_DEF(
|
||||||
AbstractSortedSegmentRangeDef<float, int, CPUContext,
|
AbstractSortedSegmentRangeDef<float, int, CPUContext, MeanRangeReducerDef>);
|
||||||
MeanRangeReducerDef>);
|
REGISTER_SEGMENT_DEF(
|
||||||
|
AbstractSortedSegmentRangeDef<float, int, CPUContext, MaxRangeReducerDef>);
|
||||||
|
|
||||||
#define REGISTER_REDUCER_WITH_ALL_OPS(reducer_def) \
|
#define REGISTER_REDUCER_WITH_ALL_OPS(reducer_def) \
|
||||||
REGISTER_SEGMENT_DEF( \
|
REGISTER_SEGMENT_DEF( \
|
||||||
|
|
|
||||||
152
caffe2/operators/sparse_to_dense_mask_op.cc
Normal file
152
caffe2/operators/sparse_to_dense_mask_op.cc
Normal file
|
|
@ -0,0 +1,152 @@
|
||||||
|
#include <algorithm>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
#include "caffe2/core/context.h"
|
||||||
|
#include "caffe2/core/operator.h"
|
||||||
|
#include "caffe2/core/tensor.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
|
||||||
|
using ValueTypes = TensorTypes<int32_t, int64_t, float, double, string, bool>;
|
||||||
|
|
||||||
|
class SparseToDenseMaskOp : public Operator<CPUContext> {
|
||||||
|
public:
|
||||||
|
SparseToDenseMaskOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<CPUContext>(operator_def, ws) {
|
||||||
|
std::vector<int> mask = GetRepeatedArgument<int>("mask");
|
||||||
|
featuresCount_ = mask.size();
|
||||||
|
auto biggest = *std::max_element(mask.begin(), mask.end());
|
||||||
|
dense_.assign(std::min(kMaxDenseSize, biggest + 1), -1);
|
||||||
|
for (int i = 0; i < mask.size(); i++) {
|
||||||
|
int id = mask[i];
|
||||||
|
CAFFE_ENFORCE(id >= 0, "Only positive IDs are allowed.");
|
||||||
|
if (id >= kMaxDenseSize) {
|
||||||
|
sparse_[id] = i;
|
||||||
|
} else {
|
||||||
|
dense_[id] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
const TypeMeta& meta = Input(INDICES).meta();
|
||||||
|
if (meta.Match<int32_t>()) {
|
||||||
|
return DoRunWithIndexType<int32_t>();
|
||||||
|
} else if (meta.Match<int64_t>()) {
|
||||||
|
return DoRunWithIndexType<int64_t>();
|
||||||
|
} else {
|
||||||
|
CAFFE_THROW("Unsupported type of tensor: ", meta.name());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TInd>
|
||||||
|
bool DoRunWithIndexType() {
|
||||||
|
if (InputSize() < 4) {
|
||||||
|
return DoRunWithLengthType<TInd, int32_t>();
|
||||||
|
} else {
|
||||||
|
const TypeMeta& meta = Input(LENGTHS).meta();
|
||||||
|
if (meta.Match<int32_t>()) {
|
||||||
|
return DoRunWithLengthType<TInd, int32_t>();
|
||||||
|
} else if (meta.Match<int64_t>()) {
|
||||||
|
return DoRunWithLengthType<TInd, int64_t>();
|
||||||
|
} else {
|
||||||
|
CAFFE_THROW("Unsupported type of tensor: ", meta.name());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TInd, typename TLen>
|
||||||
|
bool DoRunWithLengthType() {
|
||||||
|
return DispatchHelper<ValueTypes, TInd, TLen>::call(this, Input(VALUES));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TInd, typename TLen, typename TVal>
|
||||||
|
bool DoRunWithType() {
|
||||||
|
auto& sparse_indices = Input(INDICES);
|
||||||
|
CAFFE_ENFORCE(sparse_indices.ndim() == 1);
|
||||||
|
auto& sparse_values = Input(VALUES);
|
||||||
|
CAFFE_ENFORCE(sparse_values.ndim() == 1);
|
||||||
|
CAFFE_ENFORCE(sparse_indices.size() == sparse_values.size());
|
||||||
|
auto& default_value = Input(DEFAULT);
|
||||||
|
CAFFE_ENFORCE(default_value.size() == 1);
|
||||||
|
|
||||||
|
const TInd* sparse_indices_vec = sparse_indices.data<TInd>();
|
||||||
|
const TVal* sparse_values_vec = sparse_values.template data<TVal>();
|
||||||
|
const TVal* default_val = default_value.template data<TVal>();
|
||||||
|
|
||||||
|
int cols = featuresCount_;
|
||||||
|
int rows = 0;
|
||||||
|
TLen default_length = sparse_indices.dim32(0);
|
||||||
|
const TLen* lengths_vec = nullptr;
|
||||||
|
auto* output = Output(0);
|
||||||
|
if (InputSize() == 4) {
|
||||||
|
auto& lengths = Input(LENGTHS);
|
||||||
|
CAFFE_ENFORCE(lengths.ndim() == 1);
|
||||||
|
lengths_vec = lengths.data<TLen>();
|
||||||
|
rows = lengths.dim32(0);
|
||||||
|
output->Resize(rows, cols);
|
||||||
|
}
|
||||||
|
if (rows == 0) {
|
||||||
|
// if the LENGTHS is not set or it is empty, the output will be a vector
|
||||||
|
rows = 1;
|
||||||
|
lengths_vec = &default_length;
|
||||||
|
output->Resize(cols);
|
||||||
|
}
|
||||||
|
|
||||||
|
// init
|
||||||
|
TVal* output_data = output->template mutable_data<TVal>();
|
||||||
|
for (int i = 0; i < cols * rows; i++) {
|
||||||
|
output_data[i] = default_val[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
TLen offset = 0;
|
||||||
|
for (int r = 0; r < rows; r++) {
|
||||||
|
for (int c = 0; c < lengths_vec[r]; c++) {
|
||||||
|
int idx = getFeatureIdx(sparse_indices_vec[offset + c]);
|
||||||
|
if (idx != -1) {
|
||||||
|
output_data[r * cols + idx] = sparse_values_vec[offset + c];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
offset += lengths_vec[r];
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const int kMaxDenseSize = 1024 * 128;
|
||||||
|
|
||||||
|
std::unordered_map<int, int> sparse_;
|
||||||
|
std::vector<int> dense_;
|
||||||
|
int featuresCount_;
|
||||||
|
|
||||||
|
inline int getFeatureIdx(int id) const {
|
||||||
|
if (id >= kMaxDenseSize) {
|
||||||
|
const auto& iter = sparse_.find(id);
|
||||||
|
if (iter == sparse_.end()) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return iter->second;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return (id >= dense_.size()) ? -1 : dense_[id];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
INPUT_TAGS(INDICES, VALUES, DEFAULT, LENGTHS);
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
REGISTER_CPU_OPERATOR(SparseToDenseMask, SparseToDenseMaskOp);
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(SparseToDenseMask)
|
||||||
|
.NumInputs(3, 4)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc("Convert sparse representations to dense with given indices.")
|
||||||
|
.Output(0, "output", "1-D or 2-D dense tensor.");
|
||||||
|
|
||||||
|
NO_GRADIENT(SparseToDenseMask);
|
||||||
|
} // namespace
|
||||||
|
} // namespace caffe2
|
||||||
124
caffe2/operators/string_ops.cc
Normal file
124
caffe2/operators/string_ops.cc
Normal file
|
|
@ -0,0 +1,124 @@
|
||||||
|
#include "caffe2/caffe2/operators/string_ops.h"
|
||||||
|
#include "caffe2/core/operator.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct StartsWith {
|
||||||
|
explicit StartsWith(OperatorBase& op)
|
||||||
|
: prefix_(op.GetSingleArgument<std::string>("prefix", "")) {}
|
||||||
|
bool operator()(const std::string& str) {
|
||||||
|
return std::mismatch(prefix_.begin(), prefix_.end(), str.begin()).first ==
|
||||||
|
prefix_.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string prefix_;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct EndsWith {
|
||||||
|
explicit EndsWith(OperatorBase& op)
|
||||||
|
: suffix_(op.GetSingleArgument<std::string>("suffix", "")) {}
|
||||||
|
bool operator()(const std::string& str) {
|
||||||
|
return std::mismatch(suffix_.rbegin(), suffix_.rend(), str.rbegin())
|
||||||
|
.first == suffix_.rend();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string suffix_;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Prefix {
|
||||||
|
explicit Prefix(OperatorBase& op)
|
||||||
|
: length_(op.GetSingleArgument<int>("length", 3)) {}
|
||||||
|
std::string operator()(const std::string& str) {
|
||||||
|
return std::string(str.begin(), std::min(str.end(), str.begin() + length_));
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int length_;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Suffix {
|
||||||
|
explicit Suffix(OperatorBase& op)
|
||||||
|
: length_(op.GetSingleArgument<int>("length", 3)) {}
|
||||||
|
std::string operator()(const std::string& str) {
|
||||||
|
return std::string(std::max(str.begin(), str.end() - length_), str.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int length_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename ScalarFunctor, typename OutputType = std::string>
|
||||||
|
using StringElementwiseOp = UnaryElementwiseWithArgsOp<
|
||||||
|
TensorTypes<std::string>,
|
||||||
|
CPUContext,
|
||||||
|
ForEach<ScalarFunctor>,
|
||||||
|
OutputType>;
|
||||||
|
|
||||||
|
REGISTER_CPU_OPERATOR(StringPrefix, StringElementwiseOp<Prefix>);
|
||||||
|
REGISTER_CPU_OPERATOR(StringSuffix, StringElementwiseOp<Suffix>);
|
||||||
|
REGISTER_CPU_OPERATOR(StringStartsWith, StringElementwiseOp<StartsWith, bool>);
|
||||||
|
REGISTER_CPU_OPERATOR(StringEndsWith, StringElementwiseOp<EndsWith, bool>);
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(StringPrefix)
|
||||||
|
.NumInputs(1)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Computes the element-wise string prefix of the string tensor.
|
||||||
|
Input strings that are shorter than prefix length will be returned unchanged.
|
||||||
|
NOTE: Prefix is computed on number of bytes, which may lead to wrong behavior
|
||||||
|
and potentially invalid strings for variable-length encodings such as utf-8.
|
||||||
|
)DOC")
|
||||||
|
.Arg("length", "Maximum size of the prefix, in bytes.")
|
||||||
|
.Input(0, "strings", "Tensor of std::string.")
|
||||||
|
.Output(
|
||||||
|
0,
|
||||||
|
"prefixes",
|
||||||
|
"Tensor of std::string containing prefixes for each input.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(StringSuffix)
|
||||||
|
.NumInputs(1)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Computes the element-wise string suffix of the string tensor.
|
||||||
|
Input strings that are shorter than suffix length will be returned unchanged.
|
||||||
|
NOTE: Prefix is computed on number of bytes, which may lead to wrong behavior
|
||||||
|
and potentially invalid strings for variable-length encodings such as utf-8.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "strings", "Tensor of std::string.")
|
||||||
|
.Output(
|
||||||
|
0,
|
||||||
|
"suffixes",
|
||||||
|
"Tensor of std::string containing suffixes for each output.")
|
||||||
|
.Arg("length", "Maximum size of the suffix, in bytes.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(StringStartsWith)
|
||||||
|
.NumInputs(1)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Performs the starts-with check on each string in the input tensor.
|
||||||
|
Returns tensor of boolean of the same dimension of input.
|
||||||
|
)DOC")
|
||||||
|
.Arg("prefix", "The prefix to check input strings against.")
|
||||||
|
.Input(0, "strings", "Tensor of std::string.")
|
||||||
|
.Output(0, "bools", "Tensor of bools of same shape as input.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(StringEndsWith)
|
||||||
|
.NumInputs(1)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Performs the ends-with check on each string in the input tensor.
|
||||||
|
Returns tensor of boolean of the same dimension of input.
|
||||||
|
)DOC")
|
||||||
|
.Arg("suffix", "The suffix to check input strings against.")
|
||||||
|
.Input(0, "strings", "Tensor of std::string.")
|
||||||
|
.Output(0, "bools", "Tensor of bools of same shape as input.");
|
||||||
|
|
||||||
|
SHOULD_NOT_DO_GRADIENT(StringPrefix);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(StringSuffix);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(StringStartsWith);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(StringEndsWith);
|
||||||
|
}
|
||||||
|
} // namespace caffe2
|
||||||
13
caffe2/operators/string_ops.h
Normal file
13
caffe2/operators/string_ops.h
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
#pragma once
|
||||||
|
#include "caffe2/core/operator.h"
|
||||||
|
#include "caffe2/operators/elementwise_op.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
|
||||||
|
template <typename ScalarFunctor, typename OutputType = std::string>
|
||||||
|
using StringElementwiseOp = UnaryElementwiseWithArgsOp<
|
||||||
|
TensorTypes<std::string>,
|
||||||
|
CPUContext,
|
||||||
|
ForEach<ScalarFunctor>,
|
||||||
|
OutputType>;
|
||||||
|
}
|
||||||
|
|
@ -10,24 +10,25 @@ namespace caffe2 {
|
||||||
#define COMPILE_TIME_CUDA_MAX_TRANSPOSE_DIMS 5
|
#define COMPILE_TIME_CUDA_MAX_TRANSPOSE_DIMS 5
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// TODO(jiayq): one possible optimization is to copy the buffer into a shared memory
|
// TODO(jiayq): one possible optimization is to copy the buffer into a shared
|
||||||
// location to speed up access.
|
// memory location to speed up access.
|
||||||
template <typename Dtype>
|
template <typename Dtype>
|
||||||
__global__ void transpose_gpu(const int nthreads, const Dtype* from_data,
|
__global__ void transpose_gpu(const int nthreads, const Dtype* from_data,
|
||||||
Dtype* to_data, const int* buffer, const int num_axes) {
|
Dtype* to_data, const int* buffer, const int num_axes) {
|
||||||
int from_inds[COMPILE_TIME_CUDA_MAX_TRANSPOSE_DIMS];
|
int from_inds[COMPILE_TIME_CUDA_MAX_TRANSPOSE_DIMS];
|
||||||
const int* from_counts = buffer;
|
const int* from_counts = buffer;
|
||||||
const int* to_counts = buffer + num_axes;
|
const int* to_counts = buffer + num_axes;
|
||||||
const int* map = buffer + num_axes * 2;
|
const int* axes = buffer + num_axes * 2;
|
||||||
CUDA_1D_KERNEL_LOOP(index, nthreads) {
|
CUDA_1D_KERNEL_LOOP(index, nthreads) {
|
||||||
int from_index = index, to_index = 0;
|
int from_index = index, to_index = 0;
|
||||||
for (int i = 0; i < num_axes; i++) {
|
for (int i = num_axes - 1; i >= 0; --i) {
|
||||||
from_inds[i] = from_index / from_counts[i];
|
from_inds[i] = from_index % from_counts[i];
|
||||||
from_index = from_index % from_counts[i];
|
from_index = from_index / from_counts[i];
|
||||||
}
|
}
|
||||||
for (int i = 0; i < num_axes; i++) {
|
for (int i = 0; i < num_axes - 1; i++) {
|
||||||
to_index += from_inds[map[i]] * to_counts[i];
|
to_index = (to_index + from_inds[axes[i]]) * to_counts[i + 1];
|
||||||
}
|
}
|
||||||
|
to_index += from_inds[axes[num_axes - 1]];
|
||||||
to_data[to_index] = from_data[index];
|
to_data[to_index] = from_data[index];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -42,7 +43,7 @@ bool TransposeOp<CUDAContext>::DoRunWithType() {
|
||||||
int ndim = input.ndim();
|
int ndim = input.ndim();
|
||||||
CAFFE_ENFORCE(count < std::numeric_limits<int>::max(),
|
CAFFE_ENFORCE(count < std::numeric_limits<int>::max(),
|
||||||
"Transpose op on GPU only supports int32");
|
"Transpose op on GPU only supports int32");
|
||||||
CAFFE_ENFORCE(count < COMPILE_TIME_CUDA_MAX_TRANSPOSE_DIMS,
|
CAFFE_ENFORCE(ndim < COMPILE_TIME_CUDA_MAX_TRANSPOSE_DIMS,
|
||||||
"Input ndim exceeds compile time max.");
|
"Input ndim exceeds compile time max.");
|
||||||
// Buffer contains the following data:
|
// Buffer contains the following data:
|
||||||
// (1) the dimenions of the inputs
|
// (1) the dimenions of the inputs
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,8 @@ class TransposeOp final : public Operator<Context> {
|
||||||
}
|
}
|
||||||
Y->Resize(new_dims_);
|
Y->Resize(new_dims_);
|
||||||
// Do the actual transpose, which is implemented in DoRunWithType().
|
// Do the actual transpose, which is implemented in DoRunWithType().
|
||||||
return DispatchHelper<TensorTypes<float>>::call(this, Input(0));
|
return DispatchHelper<TensorTypes<float, double, int, long>>::call(
|
||||||
|
this, Input(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ REGISTER_CPU_OPERATOR(ScatterAssign, ScatterAssignOp<float, CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(Copy, CopyOp<CPUContext, CPUContext, CPUContext>);
|
REGISTER_CPU_OPERATOR(Copy, CopyOp<CPUContext, CPUContext, CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(Shape, ShapeOp<CPUContext>);
|
REGISTER_CPU_OPERATOR(Shape, ShapeOp<CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(HasElements, HasElementsOp<CPUContext>);
|
REGISTER_CPU_OPERATOR(HasElements, HasElementsOp<CPUContext>);
|
||||||
|
REGISTER_CPU_OPERATOR(IsEmpty, IsEmptyOp<CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(Gather, GatherOp<float, CPUContext>);
|
REGISTER_CPU_OPERATOR(Gather, GatherOp<float, CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(Unique, UniqueOp<CPUContext>);
|
REGISTER_CPU_OPERATOR(Unique, UniqueOp<CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(LengthsToSegmentIds, LengthsToSegmentIdsOp<CPUContext>);
|
REGISTER_CPU_OPERATOR(LengthsToSegmentIds, LengthsToSegmentIdsOp<CPUContext>);
|
||||||
|
|
@ -24,6 +25,7 @@ REGISTER_CPU_OPERATOR(SegmentIdsToLengths, SegmentIdsToLengthsOp<CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(Slice, SliceOp<int, CPUContext>);
|
REGISTER_CPU_OPERATOR(Slice, SliceOp<int, CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(Squeeze, SqueezeOp<CPUContext>);
|
REGISTER_CPU_OPERATOR(Squeeze, SqueezeOp<CPUContext>);
|
||||||
REGISTER_CPU_OPERATOR(ExpandDims, ExpandDimsOp<CPUContext>);
|
REGISTER_CPU_OPERATOR(ExpandDims, ExpandDimsOp<CPUContext>);
|
||||||
|
REGISTER_CPU_OPERATOR(And, AndOp<CPUContext>);
|
||||||
|
|
||||||
OPERATOR_SCHEMA(Print)
|
OPERATOR_SCHEMA(Print)
|
||||||
.NumInputs(1)
|
.NumInputs(1)
|
||||||
|
|
@ -209,6 +211,13 @@ OPERATOR_SCHEMA(HasElements)
|
||||||
"has_elements",
|
"has_elements",
|
||||||
"Scalar bool tensor. True if input is not empty.");
|
"Scalar bool tensor. True if input is not empty.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(IsEmpty)
|
||||||
|
.NumInputs(1)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc("Returns true iff the input tensor has size == 0")
|
||||||
|
.Input(0, "tensor", "Tensor of any type.")
|
||||||
|
.Output(0, "is_empty", "Scalar bool tensor. True if input is empty.");
|
||||||
|
|
||||||
OPERATOR_SCHEMA(Gather)
|
OPERATOR_SCHEMA(Gather)
|
||||||
.NumInputs(2)
|
.NumInputs(2)
|
||||||
.NumOutputs(1)
|
.NumOutputs(1)
|
||||||
|
|
@ -340,9 +349,21 @@ If the same blob is provided in input and output, the operation is copy-free.
|
||||||
.Input(0, "data", "Original tensor")
|
.Input(0, "data", "Original tensor")
|
||||||
.Output(0, "expanded", "Reshaped tensor with same data as input.");
|
.Output(0, "expanded", "Reshaped tensor with same data as input.");
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(And)
|
||||||
|
.NumInputs(2)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.AllowInplace({{0, 0}})
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Outputs true iff both input blob values are true.
|
||||||
|
)DOC")
|
||||||
|
.Input(0, "input_0", "first boolean input.")
|
||||||
|
.Input(1, "input_1", "second boolean input.")
|
||||||
|
.Output(0, "output", "input_0 && input_1.");
|
||||||
|
|
||||||
SHOULD_NOT_DO_GRADIENT(Print);
|
SHOULD_NOT_DO_GRADIENT(Print);
|
||||||
SHOULD_NOT_DO_GRADIENT(Shape);
|
SHOULD_NOT_DO_GRADIENT(Shape);
|
||||||
SHOULD_NOT_DO_GRADIENT(HasElements);
|
SHOULD_NOT_DO_GRADIENT(HasElements);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(IsEmpty);
|
||||||
|
|
||||||
class GetSqueezeGradient : public GradientMakerBase {
|
class GetSqueezeGradient : public GradientMakerBase {
|
||||||
using GradientMakerBase::GradientMakerBase;
|
using GradientMakerBase::GradientMakerBase;
|
||||||
|
|
@ -433,6 +454,7 @@ SHOULD_NOT_DO_GRADIENT(LengthsToSegmentIds);
|
||||||
SHOULD_NOT_DO_GRADIENT(SegmentIdsToLengths);
|
SHOULD_NOT_DO_GRADIENT(SegmentIdsToLengths);
|
||||||
// TODO(azzolini): Add support for slice gradient
|
// TODO(azzolini): Add support for slice gradient
|
||||||
SHOULD_NOT_DO_GRADIENT(Slice);
|
SHOULD_NOT_DO_GRADIENT(Slice);
|
||||||
|
SHOULD_NOT_DO_GRADIENT(And);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -675,6 +675,21 @@ class HasElementsOp : public Operator<Context> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <class Context>
|
||||||
|
class IsEmptyOp : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
USE_SIMPLE_CTOR_DTOR(IsEmptyOp);
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
auto& input = Input(0);
|
||||||
|
auto* output = OperatorBase::Output<TensorCPU>(0);
|
||||||
|
output->Resize(std::vector<TIndex>{});
|
||||||
|
*output->template mutable_data<bool>() = (input.size() == 0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// RecordShapeOp records the shape of the input tensor to a vector of int. You
|
// RecordShapeOp records the shape of the input tensor to a vector of int. You
|
||||||
// mostly don't need this operator explicitly, and it is mostly used in the
|
// mostly don't need this operator explicitly, and it is mostly used in the
|
||||||
// autodiff process.
|
// autodiff process.
|
||||||
|
|
@ -911,6 +926,23 @@ class UniqueOp : public Operator<Context> {
|
||||||
public:
|
public:
|
||||||
OUTPUT_TAGS(UNIQUE, REMAPPING);
|
OUTPUT_TAGS(UNIQUE, REMAPPING);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <class Context>
|
||||||
|
class AndOp final : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
AndOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<Context>(operator_def, ws) {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
const auto* i1 = Input(0).template data<bool>();
|
||||||
|
const auto* i2 = Input(1).template data<bool>();
|
||||||
|
auto* output = Output(0);
|
||||||
|
output->Resize(std::vector<int>{});
|
||||||
|
*output->template mutable_data<bool>() = (*i1 && *i2);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
|
||||||
#endif // CAFFE2_OPERATORS_UTILITY_OPS_H_
|
#endif // CAFFE2_OPERATORS_UTILITY_OPS_H_
|
||||||
|
|
|
||||||
|
|
@ -199,7 +199,7 @@ message ExecutionStep {
|
||||||
// Criteria network specifies a single output (TensorCPU<bool>) of
|
// Criteria network specifies a single output (TensorCPU<bool>) of
|
||||||
// size (1), is run on every iteration by the executor, and
|
// size (1), is run on every iteration by the executor, and
|
||||||
// execution terminates when the output[0] is `false`.
|
// execution terminates when the output[0] is `false`.
|
||||||
optional string criteria_network = 5;
|
optional string criteria_network = 5 [deprecated=true];
|
||||||
|
|
||||||
// If specified, run report_net asynchronously every `report_interval`
|
// If specified, run report_net asynchronously every `report_interval`
|
||||||
// seconds. Report_net is guaranteed to run at least once after all
|
// seconds. Report_net is guaranteed to run at least once after all
|
||||||
|
|
@ -210,6 +210,20 @@ message ExecutionStep {
|
||||||
// If false or not set, execute sub-steps serially.
|
// If false or not set, execute sub-steps serially.
|
||||||
// If true, execute all substeps concurrently, each one in a separte thread.
|
// If true, execute all substeps concurrently, each one in a separte thread.
|
||||||
optional bool concurrent_substeps = 6;
|
optional bool concurrent_substeps = 6;
|
||||||
|
|
||||||
|
// Name of a scalar boolean tensor.
|
||||||
|
// ES checks this blob AFTER every substeps/subnets.
|
||||||
|
// If specified, and the value is true, then ES will skip the rest and return
|
||||||
|
// immediately.
|
||||||
|
// This means that the report_net and the first step will always be called.
|
||||||
|
// Use cases:
|
||||||
|
// 1) the first substep stops the rest if data condition not met
|
||||||
|
// 2) the first substep decide which of the rest of the steps should be run.
|
||||||
|
// 3) external control
|
||||||
|
//
|
||||||
|
// ** It is the user's responsibility to not to put this blob in race conditions.
|
||||||
|
// ** For example when setting this blob in concurrent substeps
|
||||||
|
optional string should_stop_blob = 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
message PlanDef {
|
message PlanDef {
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,6 @@ const TypeMeta& NumpyTypeToCaffe(int numpy_type) {
|
||||||
{NPY_UINT8, TypeMeta::Make<uint8_t>()},
|
{NPY_UINT8, TypeMeta::Make<uint8_t>()},
|
||||||
{NPY_UINT16, TypeMeta::Make<uint16_t>()},
|
{NPY_UINT16, TypeMeta::Make<uint16_t>()},
|
||||||
{NPY_OBJECT, TypeMeta::Make<std::string>()},
|
{NPY_OBJECT, TypeMeta::Make<std::string>()},
|
||||||
{NPY_STRING, TypeMeta::Make<std::string>()},
|
|
||||||
// Note: Add more types here.
|
// Note: Add more types here.
|
||||||
};
|
};
|
||||||
static TypeMeta unknown_type;
|
static TypeMeta unknown_type;
|
||||||
|
|
@ -565,11 +564,10 @@ PyObject* FetchBlob(PyObject* self, PyObject* args) {
|
||||||
|
|
||||||
PyObject* FeedBlob(PyObject* self, PyObject* args) {
|
PyObject* FeedBlob(PyObject* self, PyObject* args) {
|
||||||
char* name_char;
|
char* name_char;
|
||||||
PyArrayObject* array = nullptr;
|
PyObject* arg = nullptr;
|
||||||
PyObject* device_option_string = nullptr;
|
PyObject* device_option_string = nullptr;
|
||||||
// TODO(dzhulgakov): implement accepting other types (at least string)
|
if (!PyArg_ParseTuple(
|
||||||
if (!PyArg_ParseTuple(args, "sO!|O", &name_char, &PyArray_Type, &array,
|
args, "sO|O", &name_char, &arg, &device_option_string)) {
|
||||||
&device_option_string)) {
|
|
||||||
PyErr_SetString(PyExc_ValueError, "Incorrect arguments.");
|
PyErr_SetString(PyExc_ValueError, "Incorrect arguments.");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
@ -584,13 +582,25 @@ PyObject* FeedBlob(PyObject* self, PyObject* args) {
|
||||||
}
|
}
|
||||||
Blob* blob = gWorkspace->CreateBlob(name);
|
Blob* blob = gWorkspace->CreateBlob(name);
|
||||||
|
|
||||||
|
if (PyArray_Check(arg)) { // numpy array
|
||||||
|
PyArrayObject* array = reinterpret_cast<PyArrayObject*>(arg);
|
||||||
auto feeder = CreateFeeder(option.device_type());
|
auto feeder = CreateFeeder(option.device_type());
|
||||||
if (!feeder) {
|
if (!feeder) {
|
||||||
PyErr_SetString(PyExc_TypeError,
|
PyErr_SetString(
|
||||||
"Unknown device type encountered in FeedBlob.");
|
PyExc_TypeError, "Unknown device type encountered in FeedBlob.");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
return feeder->Feed(option, array, blob);
|
return feeder->Feed(option, array, blob);
|
||||||
|
} else if (PyString_Check(arg)) { // string
|
||||||
|
*blob->GetMutable<std::string>() = PyBytesToStdString(arg);
|
||||||
|
Py_RETURN_TRUE;
|
||||||
|
} else {
|
||||||
|
PyErr_SetString(
|
||||||
|
PyExc_ValueError,
|
||||||
|
"Unexpected type of argument - only numpy array or string are "
|
||||||
|
"supported for feeding");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// A simple macro to avoid writing repeated symbols.
|
// A simple macro to avoid writing repeated symbols.
|
||||||
|
|
@ -620,7 +630,7 @@ PyMethodDef* GetCaffe2PythonMethods() {
|
||||||
{"cc_RunPlan", RunPlan, METH_VARARGS, ""},
|
{"cc_RunPlan", RunPlan, METH_VARARGS, ""},
|
||||||
_PYNAME(CreateBlob),
|
_PYNAME(CreateBlob),
|
||||||
_PYNAME(SerializeBlob),
|
_PYNAME(SerializeBlob),
|
||||||
_PYNAME(FetchBlob),
|
{"cc_FetchBlob", FetchBlob, METH_VARARGS, ""},
|
||||||
{"cc_FeedBlob", FeedBlob, METH_VARARGS, ""},
|
{"cc_FeedBlob", FeedBlob, METH_VARARGS, ""},
|
||||||
{nullptr, nullptr, 0, nullptr}, // end of python methods.
|
{nullptr, nullptr, 0, nullptr}, // end of python methods.
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -160,7 +160,10 @@ class TensorFetcher : public BlobFetcherBase {
|
||||||
Py_DECREF(outObj[j]);
|
Py_DECREF(outObj[j]);
|
||||||
}
|
}
|
||||||
Py_DECREF(array);
|
Py_DECREF(array);
|
||||||
LOG(FATAL) << "Failed to allocate string for ndarray of strings.";
|
PyErr_SetString(
|
||||||
|
PyExc_TypeError,
|
||||||
|
"Failed to allocate string for ndarray of strings.");
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return array;
|
return array;
|
||||||
|
|
@ -217,21 +220,14 @@ class TensorFeeder : public BlobFeederBase {
|
||||||
char* str;
|
char* str;
|
||||||
Py_ssize_t strSize;
|
Py_ssize_t strSize;
|
||||||
if (PyBytes_AsStringAndSize(input[i], &str, &strSize) == -1) {
|
if (PyBytes_AsStringAndSize(input[i], &str, &strSize) == -1) {
|
||||||
LOG(FATAL) << "Unsupported pyhton object type passed into ndarray.";
|
PyErr_SetString(
|
||||||
|
PyExc_TypeError,
|
||||||
|
"Unsupported python object type passed into ndarray.");
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
outPtr[i] = std::string(str, strSize);
|
outPtr[i] = std::string(str, strSize);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case NPY_STRING: {
|
|
||||||
char* inputData = PyArray_BYTES(array);
|
|
||||||
auto* outPtr = tensor->template mutable_data<std::string>();
|
|
||||||
auto itemSize = PyArray_ITEMSIZE(array);
|
|
||||||
for (int i = 0; i < tensor->size(); ++i) {
|
|
||||||
auto start = inputData + i * itemSize;
|
|
||||||
auto end = std::find(start, start + itemSize, '\0');
|
|
||||||
outPtr[i] = std::string(start, end - start);
|
|
||||||
}
|
|
||||||
} break;
|
|
||||||
default:
|
default:
|
||||||
context.template CopyBytes<CPUContext, Context>(
|
context.template CopyBytes<CPUContext, Context>(
|
||||||
tensor->size() * meta.itemsize(),
|
tensor->size() * meta.itemsize(),
|
||||||
|
|
|
||||||
|
|
@ -613,6 +613,7 @@ def GetArgumentParser():
|
||||||
)
|
)
|
||||||
parser.add_argument("--net_type", type=str, default="dag")
|
parser.add_argument("--net_type", type=str, default="dag")
|
||||||
parser.add_argument("--num_workers", type=int, default=2)
|
parser.add_argument("--num_workers", type=int, default=2)
|
||||||
|
parser.add_argument("--use-nvtx", default=False, action='store_true')
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -624,7 +625,9 @@ if __name__ == '__main__':
|
||||||
):
|
):
|
||||||
GetArgumentParser().print_help()
|
GetArgumentParser().print_help()
|
||||||
|
|
||||||
workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
|
workspace.GlobalInit(
|
||||||
|
['caffe2', '--caffe2_log_level=0'] +
|
||||||
|
['--caffe2_use_nvtx'] if args.use_nvtx else [])
|
||||||
model_map = {
|
model_map = {
|
||||||
'AlexNet': AlexNet,
|
'AlexNet': AlexNet,
|
||||||
'OverFeat': OverFeat,
|
'OverFeat': OverFeat,
|
||||||
|
|
|
||||||
|
|
@ -146,7 +146,7 @@ def ScopedBlobReference(name, *args, **kwargs):
|
||||||
return BlobReference(scope.NAMESCOPE + name, *args, **kwargs)
|
return BlobReference(scope.NAMESCOPE + name, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _RectifyInputOutput(blobs):
|
def _RectifyInputOutput(blobs, net=None):
|
||||||
"""A helper function to rectify the input or output of the CreateOperator
|
"""A helper function to rectify the input or output of the CreateOperator
|
||||||
interface.
|
interface.
|
||||||
"""
|
"""
|
||||||
|
|
@ -154,18 +154,18 @@ def _RectifyInputOutput(blobs):
|
||||||
# If blobs is a single string, prepend scope.NAMESCOPE and put it as a
|
# If blobs is a single string, prepend scope.NAMESCOPE and put it as a
|
||||||
# list.
|
# list.
|
||||||
# TODO(jiayq): enforce using BlobReference instead of raw strings.
|
# TODO(jiayq): enforce using BlobReference instead of raw strings.
|
||||||
return [ScopedBlobReference(blobs)]
|
return [ScopedBlobReference(blobs, net=net)]
|
||||||
elif type(blobs) is BlobReference:
|
elif type(blobs) is BlobReference:
|
||||||
# If blob is a BlobReference, simply put it as a list.
|
# If blob is a BlobReference, simply put it as a list.
|
||||||
return [BlobReference(str(blobs))]
|
return [blobs]
|
||||||
elif type(blobs) is list:
|
elif type(blobs) in (list, tuple):
|
||||||
# If blob is a list, we go through it and type check.
|
# If blob is a list, we go through it and type check.
|
||||||
rectified = []
|
rectified = []
|
||||||
for blob in blobs:
|
for blob in blobs:
|
||||||
if isinstance(blob, basestring):
|
if isinstance(blob, basestring):
|
||||||
rectified.append(ScopedBlobReference(blob))
|
rectified.append(ScopedBlobReference(blob, net=net))
|
||||||
elif type(blob) is BlobReference:
|
elif type(blob) is BlobReference:
|
||||||
rectified.append(BlobReference(str(blob)))
|
rectified.append(blob)
|
||||||
else:
|
else:
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"I/O blob #{} of unsupported type: {} of type {}"
|
"I/O blob #{} of unsupported type: {} of type {}"
|
||||||
|
|
@ -670,8 +670,19 @@ def get_op_ids_in_path(ssa, blob_versions, inputs, outputs):
|
||||||
|
|
||||||
|
|
||||||
class Net(object):
|
class Net(object):
|
||||||
|
_net_names_used = set()
|
||||||
operator_registry_ = {}
|
operator_registry_ = {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_next_net_name(basename):
|
||||||
|
name = basename
|
||||||
|
next_idx = 1
|
||||||
|
while name in Net._net_names_used:
|
||||||
|
name = basename + '_' + str(next_idx)
|
||||||
|
next_idx += 1
|
||||||
|
Net._net_names_used |= set([name])
|
||||||
|
return name
|
||||||
|
|
||||||
def __init__(self, name_or_proto):
|
def __init__(self, name_or_proto):
|
||||||
"""
|
"""
|
||||||
Create a Net.
|
Create a Net.
|
||||||
|
|
@ -706,29 +717,29 @@ class Net(object):
|
||||||
else:
|
else:
|
||||||
self._next_name_index = 0
|
self._next_name_index = 0
|
||||||
else:
|
else:
|
||||||
name = name_or_proto
|
|
||||||
self._net = caffe2_pb2.NetDef()
|
self._net = caffe2_pb2.NetDef()
|
||||||
self._net.name = name
|
self._net.name = name_or_proto
|
||||||
self._next_name_index = 0
|
self._next_name_index = 0
|
||||||
|
|
||||||
|
# make sure that this net name hasn't been used before
|
||||||
|
self._net.name = Net._get_next_net_name(self._net.name)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self._net.name
|
return self._net.name
|
||||||
|
|
||||||
def DefinesBlob(self, blob):
|
def BlobIsDefined(self, blob):
|
||||||
"""
|
"""
|
||||||
Returns true if the given BlobReference is produced as output of
|
Returns true if the given BlobReference is produced as output of
|
||||||
an operator in this net, or if it is provided as an external input.
|
an operator in this net, or if it is provided as an external input.
|
||||||
"""
|
"""
|
||||||
if isinstance(blob, BlobReference):
|
|
||||||
assert blob.Net() == self, 'Reference belongs to different net'
|
|
||||||
blob_name = str(blob)
|
blob_name = str(blob)
|
||||||
|
for input in self._net.external_input:
|
||||||
|
if input == blob_name:
|
||||||
|
return True
|
||||||
for op in self._net.op:
|
for op in self._net.op:
|
||||||
for output in op.output:
|
for output in op.output:
|
||||||
if output == blob_name:
|
if output == blob_name:
|
||||||
return True
|
return True
|
||||||
for input in self._net.external_input:
|
|
||||||
if input == blob_name:
|
|
||||||
return True
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def UsesBlob(self, blob):
|
def UsesBlob(self, blob):
|
||||||
|
|
@ -753,7 +764,7 @@ class Net(object):
|
||||||
raises KeyError.
|
raises KeyError.
|
||||||
"""
|
"""
|
||||||
blob_name = str(blob_name)
|
blob_name = str(blob_name)
|
||||||
if not self.DefinesBlob(blob_name):
|
if not self.BlobIsDefined(blob_name):
|
||||||
raise KeyError('Net does not define blob %s' % blob_name)
|
raise KeyError('Net does not define blob %s' % blob_name)
|
||||||
return BlobReference(blob_name, self)
|
return BlobReference(blob_name, self)
|
||||||
|
|
||||||
|
|
@ -818,13 +829,16 @@ class Net(object):
|
||||||
new_outputs: list of BlobReferences corresponding to the
|
new_outputs: list of BlobReferences corresponding to the
|
||||||
outputs produced by new_net.
|
outputs produced by new_net.
|
||||||
"""
|
"""
|
||||||
inputs = inputs if isinstance(inputs, dict) else {i: i for i in inputs}
|
input_is_pair_list = isinstance(inputs, list) and all(
|
||||||
|
isinstance(i, tuple) and len(i) == 2 for i in inputs)
|
||||||
|
inputs = (
|
||||||
|
inputs if isinstance(inputs, (dict, OrderedDict)) else
|
||||||
|
OrderedDict(inputs) if input_is_pair_list else
|
||||||
|
OrderedDict(zip(inputs, inputs)))
|
||||||
|
for output in outputs:
|
||||||
|
assert self.BlobIsDefined(output)
|
||||||
input_names = {str(k): str(v) for k, v in inputs.items()}
|
input_names = {str(k): str(v) for k, v in inputs.items()}
|
||||||
output_names = [str(o) for o in outputs]
|
output_names = [str(o) for o in outputs]
|
||||||
for input in inputs.keys():
|
|
||||||
assert self.UsesBlob(input)
|
|
||||||
for output in outputs:
|
|
||||||
assert self.DefinesBlob(output)
|
|
||||||
proto = self._net
|
proto = self._net
|
||||||
ssa, blob_versions = get_ssa(proto)
|
ssa, blob_versions = get_ssa(proto)
|
||||||
used_op_ids = get_op_ids_in_path(ssa, blob_versions, inputs, outputs)
|
used_op_ids = get_op_ids_in_path(ssa, blob_versions, inputs, outputs)
|
||||||
|
|
@ -859,9 +873,21 @@ class Net(object):
|
||||||
def Proto(self):
|
def Proto(self):
|
||||||
return self._net
|
return self._net
|
||||||
|
|
||||||
def NextName(self):
|
def NextName(self, prefix=None, output_id=None):
|
||||||
"""Returns the next name to be used, if you do not want to explicitly
|
"""Returns the next name to be used, if you do not want to explicitly
|
||||||
name your blob."""
|
name your blob."""
|
||||||
|
if prefix:
|
||||||
|
output_name_base = self._net.name + '/' + prefix
|
||||||
|
output_name = output_name_base
|
||||||
|
if output_id is not None:
|
||||||
|
output_name += ':' + str(output_id)
|
||||||
|
index = 2
|
||||||
|
while self.BlobIsDefined(output_name):
|
||||||
|
output_name = output_name_base + '_' + str(index)
|
||||||
|
if output_id is not None:
|
||||||
|
output_name += ':' + str(output_id)
|
||||||
|
index += 1
|
||||||
|
else:
|
||||||
output_name = self._net.name + '_blob_' + str(self._next_name_index)
|
output_name = self._net.name + '_blob_' + str(self._next_name_index)
|
||||||
self._next_name_index += 1
|
self._next_name_index += 1
|
||||||
return str(output_name)
|
return str(output_name)
|
||||||
|
|
@ -900,16 +926,18 @@ class Net(object):
|
||||||
self._net.op.extend(grad_ops)
|
self._net.op.extend(grad_ops)
|
||||||
return input_to_grad
|
return input_to_grad
|
||||||
|
|
||||||
def AddExternalInput(self, input_name):
|
def AddExternalInput(self, input):
|
||||||
input_name = str(input_name)
|
input_name = str(input)
|
||||||
assert input_name not in self._net.external_input, (
|
assert input_name not in self._net.external_input, (
|
||||||
'Net already contains an input named %s' % input_name)
|
'Net already contains an input named %s' % input_name)
|
||||||
self._net.external_input.extend([input_name])
|
self._net.external_input.extend([input_name])
|
||||||
return BlobReference(input_name, self)
|
return (
|
||||||
|
input if isinstance(input, BlobReference)
|
||||||
|
else BlobReference(input_name))
|
||||||
|
|
||||||
def AddExternalOutput(self, output):
|
def AddExternalOutput(self, output):
|
||||||
assert isinstance(output, BlobReference)
|
assert isinstance(output, BlobReference)
|
||||||
assert self.DefinesBlob(output)
|
assert self.BlobIsDefined(output)
|
||||||
self.Proto().external_output.extend([str(output)])
|
self.Proto().external_output.extend([str(output)])
|
||||||
|
|
||||||
def DeduplicateGradientSlices(self, g):
|
def DeduplicateGradientSlices(self, g):
|
||||||
|
|
@ -931,14 +959,22 @@ class Net(object):
|
||||||
def _CreateAndAddToSelf(self, op_type, inputs, outputs=None, **kwargs):
|
def _CreateAndAddToSelf(self, op_type, inputs, outputs=None, **kwargs):
|
||||||
"""A helper function to create an operator and add it to self.
|
"""A helper function to create an operator and add it to self.
|
||||||
"""
|
"""
|
||||||
|
inputs = _RectifyInputOutput(inputs)
|
||||||
|
for input in inputs:
|
||||||
|
if not self.BlobIsDefined(input):
|
||||||
|
assert input.Net() != self
|
||||||
|
self.AddExternalInput(input)
|
||||||
if outputs is None:
|
if outputs is None:
|
||||||
# If we do not specify an output, we will assume that this op
|
# If we do not specify an output, we will assume that this op
|
||||||
# produces one output in this case.
|
# produces one output in this case.
|
||||||
outputs = self.NextName()
|
outputs = self.NextName(prefix=op_type)
|
||||||
elif type(outputs) is int:
|
elif type(outputs) is int:
|
||||||
# In this case, we will auto-fill the given number of outputs
|
# In this case, we will auto-fill the given number of outputs
|
||||||
# with auto-generated names.
|
# with auto-generated names.
|
||||||
outputs = [self.NextName() for i in range(outputs)]
|
outputs = [
|
||||||
|
self.NextName(prefix=op_type, output_id=i)
|
||||||
|
for i in range(outputs)]
|
||||||
|
outputs = _RectifyInputOutput(outputs, net=self)
|
||||||
op = CreateOperator(op_type, inputs, outputs, **kwargs)
|
op = CreateOperator(op_type, inputs, outputs, **kwargs)
|
||||||
self._net.op.extend([op])
|
self._net.op.extend([op])
|
||||||
if len(op.output) == 0:
|
if len(op.output) == 0:
|
||||||
|
|
@ -1036,10 +1072,11 @@ class ExecutionStep(object):
|
||||||
self._assert_can_mutate()
|
self._assert_can_mutate()
|
||||||
self._step.num_iter = num_iter
|
self._step.num_iter = num_iter
|
||||||
|
|
||||||
def SetCriteriaNet(self, criteria_net):
|
def SetShouldStopBlob(self, should_stop_blob):
|
||||||
|
assert isinstance(should_stop_blob, BlobReference), (
|
||||||
|
"expects BlobReference here, got {}".format(type(should_stop_blob)))
|
||||||
self._assert_can_mutate()
|
self._assert_can_mutate()
|
||||||
_add_net_to_dict(self._net_dict, criteria_net)
|
self._step.should_stop_blob = str(should_stop_blob)
|
||||||
self._step.criteria_network = get_net_name(criteria_net)
|
|
||||||
|
|
||||||
def SetReportNet(self, report_net, report_interval):
|
def SetReportNet(self, report_net, report_interval):
|
||||||
self._assert_can_mutate()
|
self._assert_can_mutate()
|
||||||
|
|
@ -1053,7 +1090,7 @@ class ExecutionStep(object):
|
||||||
if isinstance(substep, ExecutionStep):
|
if isinstance(substep, ExecutionStep):
|
||||||
substep._notify_is_used()
|
substep._notify_is_used()
|
||||||
if not substep.HasNets() and not substep.HasSubsteps():
|
if not substep.HasNets() and not substep.HasSubsteps():
|
||||||
return
|
return self
|
||||||
for net in substep.Nets():
|
for net in substep.Nets():
|
||||||
_add_net_to_dict(self._net_dict, net)
|
_add_net_to_dict(self._net_dict, net)
|
||||||
self._substeps.append(substep)
|
self._substeps.append(substep)
|
||||||
|
|
@ -1061,6 +1098,7 @@ class ExecutionStep(object):
|
||||||
else:
|
else:
|
||||||
proto = substep
|
proto = substep
|
||||||
self._step.substep.add().CopyFrom(proto)
|
self._step.substep.add().CopyFrom(proto)
|
||||||
|
return self
|
||||||
|
|
||||||
def SetConcurrentSubsteps(self, concurrent_substeps):
|
def SetConcurrentSubsteps(self, concurrent_substeps):
|
||||||
self._assert_can_mutate()
|
self._assert_can_mutate()
|
||||||
|
|
@ -1073,6 +1111,7 @@ class ExecutionStep(object):
|
||||||
assert isinstance(net, Net)
|
assert isinstance(net, Net)
|
||||||
_add_net_to_dict(self._net_dict, net)
|
_add_net_to_dict(self._net_dict, net)
|
||||||
self._step.network.extend([get_net_name(net)])
|
self._step.network.extend([get_net_name(net)])
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
class Plan(object):
|
class Plan(object):
|
||||||
|
|
@ -1107,11 +1146,11 @@ class Plan(object):
|
||||||
|
|
||||||
def execution_step(default_name,
|
def execution_step(default_name,
|
||||||
steps_or_nets,
|
steps_or_nets,
|
||||||
criteria=None,
|
|
||||||
num_iter=None,
|
num_iter=None,
|
||||||
report_net=None,
|
report_net=None,
|
||||||
report_interval=None,
|
report_interval=None,
|
||||||
concurrent_substeps=None):
|
concurrent_substeps=None,
|
||||||
|
should_stop_blob=None):
|
||||||
"""
|
"""
|
||||||
Helper for creating an ExecutionStep.
|
Helper for creating an ExecutionStep.
|
||||||
- steps_or_nets can be:
|
- steps_or_nets can be:
|
||||||
|
|
@ -1120,18 +1159,20 @@ def execution_step(default_name,
|
||||||
- ExecutionStep
|
- ExecutionStep
|
||||||
- list<Net>
|
- list<Net>
|
||||||
- list<ExecutionStep>
|
- list<ExecutionStep>
|
||||||
- criteria is either None or a Net
|
- should_stop_blob is either None or a scalar boolean blob.
|
||||||
- if no criteria or num_iter is provided, defaults to num_iter=1
|
- This blob is checked AFTER every substeps/subnets.
|
||||||
|
- If specified and true, then this step will return immediately.
|
||||||
|
- Be sure to handle race conditions if setting from concurrent threads.
|
||||||
|
- if no should_stop_blob or num_iter is provided, defaults to num_iter=1
|
||||||
"""
|
"""
|
||||||
assert criteria is None or isinstance(criteria, Net)
|
assert should_stop_blob is None or num_iter is None, (
|
||||||
assert criteria is None or num_iter is None, (
|
'Cannot set both should_stop_blob and num_iter.')
|
||||||
'Cannot set both criteria and num_iter.')
|
if should_stop_blob is None and num_iter is None:
|
||||||
if criteria is None and num_iter is None:
|
|
||||||
num_iter = 1
|
num_iter = 1
|
||||||
|
|
||||||
def set_criteria(step):
|
def set_step_attr(step):
|
||||||
if criteria is not None:
|
if should_stop_blob is not None:
|
||||||
step.SetCriteriaNet(criteria)
|
step.SetShouldStopBlob(should_stop_blob)
|
||||||
else:
|
else:
|
||||||
step.SetIter(num_iter)
|
step.SetIter(num_iter)
|
||||||
if concurrent_substeps is not None:
|
if concurrent_substeps is not None:
|
||||||
|
|
@ -1144,18 +1185,20 @@ def execution_step(default_name,
|
||||||
if not steps_or_nets:
|
if not steps_or_nets:
|
||||||
return ExecutionStep(default_name)
|
return ExecutionStep(default_name)
|
||||||
if isinstance(steps_or_nets, ExecutionStep):
|
if isinstance(steps_or_nets, ExecutionStep):
|
||||||
return set_criteria(steps_or_nets)
|
step = set_step_attr(ExecutionStep(default_name))
|
||||||
|
step.AddSubstep(steps_or_nets)
|
||||||
|
return step
|
||||||
elif isinstance(steps_or_nets, Net):
|
elif isinstance(steps_or_nets, Net):
|
||||||
step = set_criteria(ExecutionStep(default_name))
|
step = set_step_attr(ExecutionStep(default_name))
|
||||||
step.AddNet(steps_or_nets)
|
step.AddNet(steps_or_nets)
|
||||||
return step
|
return step
|
||||||
elif isinstance(steps_or_nets, list):
|
elif isinstance(steps_or_nets, list):
|
||||||
if isinstance(steps_or_nets[0], Net):
|
if isinstance(steps_or_nets[0], Net):
|
||||||
step = set_criteria(ExecutionStep(default_name))
|
step = set_step_attr(ExecutionStep(default_name))
|
||||||
map(step.AddNet, steps_or_nets)
|
map(step.AddNet, steps_or_nets)
|
||||||
return step
|
return step
|
||||||
elif isinstance(steps_or_nets[0], ExecutionStep):
|
elif isinstance(steps_or_nets[0], ExecutionStep):
|
||||||
step = set_criteria(ExecutionStep(default_name))
|
step = set_step_attr(ExecutionStep(default_name))
|
||||||
map(step.AddSubstep, steps_or_nets)
|
map(step.AddSubstep, steps_or_nets)
|
||||||
return step
|
return step
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
276
caffe2/python/dataset.py
Normal file
276
caffe2/python/dataset.py
Normal file
|
|
@ -0,0 +1,276 @@
|
||||||
|
"""
|
||||||
|
Implementation of an in-memory dataset with structured schema.
|
||||||
|
|
||||||
|
Use this to store and iterate through datasets with complex schema that
|
||||||
|
fit in memory.
|
||||||
|
|
||||||
|
Iterating through entries of this dataset is very fast since the dataset
|
||||||
|
is stored as a set of native Caffe2 tensors, thus no type conversion or
|
||||||
|
deserialization is necessary.
|
||||||
|
"""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from caffe2.python import core, workspace
|
||||||
|
from caffe2.python.io import Reader, Writer
|
||||||
|
from caffe2.python.schema import Struct
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class _DatasetReader(Reader):
|
||||||
|
def __init__(self, field_names, field_blobs, cursor, name):
|
||||||
|
"""Don't call this directly. Instead, use dataset.reader()"""
|
||||||
|
self.field_names = field_names
|
||||||
|
self.field_blobs = field_blobs
|
||||||
|
self.cursor = cursor
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
def read(self, read_net, batch_size=1):
|
||||||
|
with core.NameScope(read_net.NextName(self.name)):
|
||||||
|
fields = read_net.ReadNextBatch(
|
||||||
|
[self.cursor] + self.field_blobs,
|
||||||
|
self.field_names,
|
||||||
|
batch_size=batch_size)
|
||||||
|
return (read_net.IsEmpty([fields[0]]), fields)
|
||||||
|
|
||||||
|
def reset(self, net):
|
||||||
|
net.ResetCursor([self.cursor], [])
|
||||||
|
|
||||||
|
|
||||||
|
class _DatasetRandomReader(Reader):
|
||||||
|
def __init__(self, field_names, field_blobs, cursor, name, indices):
|
||||||
|
"""Don't call this directly. Instead, use dataset.random_reader()"""
|
||||||
|
self.field_names = field_names
|
||||||
|
self.field_blobs = field_blobs
|
||||||
|
self.cursor = cursor
|
||||||
|
self.name = name
|
||||||
|
self.indices = indices
|
||||||
|
|
||||||
|
def reset(self, net):
|
||||||
|
net.ResetCursor([self.cursor], [])
|
||||||
|
|
||||||
|
def computeoffset(self, net):
|
||||||
|
self.reset(net)
|
||||||
|
offsets = net.ComputeOffset(
|
||||||
|
[self.cursor] + self.field_blobs,
|
||||||
|
'offsets')
|
||||||
|
self.offsets = offsets
|
||||||
|
|
||||||
|
def read(self, read_net, batch_size=1):
|
||||||
|
fields = read_net.ReadRandomBatch(
|
||||||
|
[self.cursor, self.indices, self.offsets] + self.field_blobs,
|
||||||
|
self.field_names,
|
||||||
|
batch_size=batch_size)
|
||||||
|
return (read_net.IsEmpty([fields[0]]), fields)
|
||||||
|
|
||||||
|
|
||||||
|
class _DatasetWriter(Writer):
|
||||||
|
def __init__(self, fields, field_blobs, init_net):
|
||||||
|
"""Don't call this directly. Use dataset.writer() instead."""
|
||||||
|
self.fields = fields
|
||||||
|
self.field_blobs = field_blobs
|
||||||
|
self.mutex = init_net.CreateMutex([])
|
||||||
|
|
||||||
|
def write(self, writer_net, fields):
|
||||||
|
"""
|
||||||
|
Add operations to `net` that append the blobs in `fields` to the end
|
||||||
|
of the dataset. An additional operator will also be added that checks
|
||||||
|
the consistency of the data in `fields` against the dataset schema.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
writer_net: The net that will contain the Append operators.
|
||||||
|
fields: A list of BlobReference to be appeneded to this dataset.
|
||||||
|
"""
|
||||||
|
assert len(fields) == len(self.fields), (
|
||||||
|
'Expected %s fields, got %s.' % (len(self.fields), len(fields)))
|
||||||
|
writer_net.CheckDatasetConsistency(fields, [], fields=self.fields)
|
||||||
|
writer_net.AtomicAppend(
|
||||||
|
[self.mutex] + list(self.field_blobs) + list(fields),
|
||||||
|
self.field_blobs)
|
||||||
|
|
||||||
|
def commit(self, finish_net):
|
||||||
|
"""Commit is a no-op for an in-memory dataset."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def to_ndarray_list(values, schema):
|
||||||
|
"""
|
||||||
|
Given a list of values and a dataset schema, produce list of ndarray in the
|
||||||
|
right format.
|
||||||
|
|
||||||
|
This function will perform some checks to make sure that the arrays
|
||||||
|
produced have the right dtype and rank.
|
||||||
|
"""
|
||||||
|
assert isinstance(schema, Struct), 'schema must be a Struct.'
|
||||||
|
names = schema.field_names()
|
||||||
|
types = schema.field_types()
|
||||||
|
assert len(types) == len(values), (
|
||||||
|
'Values must have %d elements, got %d' % (len(types), len(values)))
|
||||||
|
|
||||||
|
arrays = []
|
||||||
|
for value, dtype, name in zip(values, types, names):
|
||||||
|
array = np.array(value, dtype=dtype.base)
|
||||||
|
# if array is empty we may need to reshape a little
|
||||||
|
if array.size == 0:
|
||||||
|
array = array.reshape((0,) + dtype.shape)
|
||||||
|
# check that the inner dimensions match the schema
|
||||||
|
assert (array.shape[1:] == dtype.shape), (
|
||||||
|
'Invalid array shape for field %s. Expected (%s), got (%s).' % (
|
||||||
|
name,
|
||||||
|
', '.join(['_'] + map(str, dtype.shape)),
|
||||||
|
', '.join(map(str, array.shape))))
|
||||||
|
arrays.append(array)
|
||||||
|
return arrays
|
||||||
|
|
||||||
|
|
||||||
|
def Const(net, value, dtype=None, name=None):
|
||||||
|
"""
|
||||||
|
Create a 'constant' by first creating an external input in the given
|
||||||
|
net, and then feeding the corresponding blob with its provided value
|
||||||
|
in the current workspace. The name is automatically generated in order
|
||||||
|
to avoid clashes with existing blob names.
|
||||||
|
"""
|
||||||
|
assert isinstance(net, core.Net), 'net must be a core.Net instance.'
|
||||||
|
value = np.array(value, dtype=dtype)
|
||||||
|
blob = net.AddExternalInput(net.NextName(prefix=name))
|
||||||
|
workspace.FeedBlob(str(blob), value)
|
||||||
|
return blob
|
||||||
|
|
||||||
|
|
||||||
|
class Dataset(object):
|
||||||
|
"""Represents an in-memory dataset with fixed schema.
|
||||||
|
|
||||||
|
Use this to store and iterate through datasets with complex schema that
|
||||||
|
fit in memory.
|
||||||
|
|
||||||
|
Iterating through entries of this dataset is very fast since the dataset
|
||||||
|
is stored as a set of native Caffe2 tensors, thus no type conversion or
|
||||||
|
deserialization is necessary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, fields, name=None):
|
||||||
|
"""Create an un-initialized dataset with schema provided by `fields`.
|
||||||
|
|
||||||
|
Before this dataset can be used, it must be initialized, either by
|
||||||
|
`init_empty` or `init_from_dataframe`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fields: either a schema.Struct or a list of field names in a format
|
||||||
|
compatible with the one described in schema.py.
|
||||||
|
name: optional name to prepend to blobs that will store the data.
|
||||||
|
"""
|
||||||
|
assert isinstance(fields, list) or isinstance(fields, Struct), (
|
||||||
|
'fields must be either a Struct or a list of raw field names.')
|
||||||
|
self.schema = fields
|
||||||
|
self.fields = (
|
||||||
|
fields.field_names() if isinstance(fields, Struct) else fields)
|
||||||
|
self.field_types = (
|
||||||
|
fields.field_types() if isinstance(fields, Struct) else
|
||||||
|
[np.dtype(np.void)] * len(self.fields))
|
||||||
|
self.name = name or 'dataset'
|
||||||
|
self.field_blobs = None
|
||||||
|
|
||||||
|
def init_empty(self, init_net):
|
||||||
|
"""Initialize the blobs for this dataset with empty values.
|
||||||
|
|
||||||
|
Empty arrays will be immediately fed into the current workspace,
|
||||||
|
and `init_net` will take those blobs as external inputs.
|
||||||
|
"""
|
||||||
|
self.field_blobs = [Const(init_net, [], name=f) for f in self.fields]
|
||||||
|
|
||||||
|
def init_from_dataframe(self, net, dataframe):
|
||||||
|
"""Initialize the blobs for this dataset from a Pandas dataframe.
|
||||||
|
|
||||||
|
Each column of the dataframe will be immediately fed into the current
|
||||||
|
workspace, and the `net` will take this blobs as external inputs.
|
||||||
|
"""
|
||||||
|
assert len(self.fields) == len(dataframe.columns)
|
||||||
|
self.field_blobs = [
|
||||||
|
Const(net, dataframe.as_matrix([col]).flatten(), name=field)
|
||||||
|
for col, field in enumerate(self.fields)]
|
||||||
|
|
||||||
|
def get_blobs(self):
|
||||||
|
"""
|
||||||
|
Return the list of BlobReference pointing to the blobs that contain
|
||||||
|
the data for this dataset.
|
||||||
|
"""
|
||||||
|
assert self
|
||||||
|
return self.field_blobs
|
||||||
|
|
||||||
|
def field_names(self):
|
||||||
|
"""Return the list of field names for this dataset."""
|
||||||
|
return self.fields
|
||||||
|
|
||||||
|
def field_types(self):
|
||||||
|
"""
|
||||||
|
Return the list of field dtypes for this dataset.
|
||||||
|
|
||||||
|
If a list of strings, not a schema.Struct, was passed to the
|
||||||
|
constructor, this will return a list of dtype(np.void).
|
||||||
|
"""
|
||||||
|
return self.field_types
|
||||||
|
|
||||||
|
def reader(self, init_net, cursor_name=None):
|
||||||
|
"""Create a Reader object that is used to iterate through the dataset.
|
||||||
|
|
||||||
|
This will append operations to `init_net` that create a TreeCursor,
|
||||||
|
used to iterate through the data.
|
||||||
|
|
||||||
|
NOTE: Currently, it is not safe to append to a dataset while reading.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
init_net: net that will be run once to create the cursor.
|
||||||
|
cursor_name: optional name for the blob containing a pointer
|
||||||
|
to the cursor.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A _DatasetReader that can be used to create operators that will
|
||||||
|
iterate through the dataset.
|
||||||
|
"""
|
||||||
|
assert self.field_blobs, 'Dataset not initialized.'
|
||||||
|
cursor_name = cursor_name or (self.name + '_cursor')
|
||||||
|
cursor = init_net.CreateTreeCursor(
|
||||||
|
[],
|
||||||
|
[cursor_name],
|
||||||
|
fields=self.fields)
|
||||||
|
return _DatasetReader(
|
||||||
|
self.fields, self.field_blobs, cursor, cursor_name)
|
||||||
|
|
||||||
|
def random_reader(self, init_net, indices, cursor_name=None):
|
||||||
|
"""Create a Reader object that is used to iterate through the dataset.
|
||||||
|
|
||||||
|
NOTE: The reader order depends on the order in indices.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
Similar to reader
|
||||||
|
indices: blob of reading order
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A DatasetReader that can be used to create operators that will
|
||||||
|
iterate through the dataset according to indices.
|
||||||
|
"""
|
||||||
|
assert self.field_blobs, 'Dataset not initialized.'
|
||||||
|
cursor_name = cursor_name or (self.name + '_cursor')
|
||||||
|
cursor = init_net.CreateTreeCursor(
|
||||||
|
[],
|
||||||
|
[cursor_name],
|
||||||
|
fields=self.fields)
|
||||||
|
return _DatasetRandomReader(
|
||||||
|
self.fields, self.field_blobs, cursor, cursor_name, indices)
|
||||||
|
|
||||||
|
def writer(self, init_net):
|
||||||
|
"""Create a Writer that can be used to append entries into the dataset.
|
||||||
|
|
||||||
|
NOTE: Currently, it is not safe to append to a dataset
|
||||||
|
while reading from it.
|
||||||
|
NOTE: Currently implementation of writer is not thread safe.
|
||||||
|
TODO: fixme
|
||||||
|
|
||||||
|
Args:
|
||||||
|
init_net: net that will be run once in order to create the writer.
|
||||||
|
(currently not used)
|
||||||
|
"""
|
||||||
|
assert self.field_blobs, 'Dataset not initialized.'
|
||||||
|
return _DatasetWriter(self.fields, self.field_blobs, init_net)
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import copy
|
import copy
|
||||||
from caffe2.python import core, workspace
|
from caffe2.python import workspace
|
||||||
|
|
||||||
|
|
||||||
class DeviceChecker(object):
|
class DeviceChecker(object):
|
||||||
|
|
@ -41,9 +41,8 @@ class DeviceChecker(object):
|
||||||
op.device_option.CopyFrom(device_option)
|
op.device_option.CopyFrom(device_option)
|
||||||
workspace.RunOperatorOnce(op)
|
workspace.RunOperatorOnce(op)
|
||||||
results.append(
|
results.append(
|
||||||
[workspace.FetchBlob(op.output[idx]) for idx in outputs_to_check
|
[workspace.FetchBlob(op.output[idx])
|
||||||
]
|
for idx in outputs_to_check])
|
||||||
)
|
|
||||||
# Everything is done, reset the workspace.
|
# Everything is done, reset the workspace.
|
||||||
workspace.ResetWorkspace()
|
workspace.ResetWorkspace()
|
||||||
# After running on all devices, check correctness
|
# After running on all devices, check correctness
|
||||||
|
|
@ -68,8 +67,8 @@ class DeviceChecker(object):
|
||||||
return success
|
return success
|
||||||
|
|
||||||
def CheckNet(self, net, inputs={}, blobs_to_check=None, ignore=set()):
|
def CheckNet(self, net, inputs={}, blobs_to_check=None, ignore=set()):
|
||||||
"""Checks a network by inspecting all of its intermediate results, and see
|
"""Checks a network by inspecting all of its intermediate results, and
|
||||||
if things match.
|
see if things match.
|
||||||
"""
|
"""
|
||||||
old_ws_name = workspace.CurrentWorkspace()
|
old_ws_name = workspace.CurrentWorkspace()
|
||||||
results = []
|
results = []
|
||||||
|
|
@ -78,7 +77,7 @@ class DeviceChecker(object):
|
||||||
blobs_to_check = [b for b in blobs_to_check if b not in ignore]
|
blobs_to_check = [b for b in blobs_to_check if b not in ignore]
|
||||||
workspace.SwitchWorkspace("_device_check_", True)
|
workspace.SwitchWorkspace("_device_check_", True)
|
||||||
for i, device_option in enumerate(self._device_options):
|
for i, device_option in enumerate(self._device_options):
|
||||||
for name, arr in inputs.iteritems():
|
for name, arr in inputs.items():
|
||||||
# print 'feeding', name
|
# print 'feeding', name
|
||||||
workspace.FeedBlob(name, arr, device_option)
|
workspace.FeedBlob(name, arr, device_option)
|
||||||
for op in net.op:
|
for op in net.op:
|
||||||
|
|
@ -93,15 +92,18 @@ class DeviceChecker(object):
|
||||||
for j in range(len(blobs_to_check)):
|
for j in range(len(blobs_to_check)):
|
||||||
x = results[i][j]
|
x = results[i][j]
|
||||||
y = results[0][j]
|
y = results[0][j]
|
||||||
if np.any(np.abs(x - y) > self._threshold):
|
if not np.allclose(x, y,
|
||||||
|
atol=self._threshold, rtol=self._threshold):
|
||||||
print('Failure in checking device option {}'
|
print('Failure in checking device option {}'
|
||||||
' and output {}. The outputs are:'
|
' and output {}. The outputs are:'
|
||||||
.format(i, blobs_to_check[j]))
|
.format(i, blobs_to_check[j]))
|
||||||
print(x.flatten())
|
print(x.flatten())
|
||||||
print(y.flatten())
|
print(y.flatten())
|
||||||
|
print(np.max(np.abs(x - y)))
|
||||||
success = False
|
success = False
|
||||||
# else:
|
# else:
|
||||||
# print ('Passed device pair (%d, %d), %s %s: %s' %
|
# print ('Passed device pair (%d, %d), %s %s: %s' %
|
||||||
# (i, j, blobs_to_check[j], y.shape, str(y.flatten())))
|
# (i, j, blobs_to_check[j], y.shape,
|
||||||
|
# str(y.flatten())))
|
||||||
workspace.SwitchWorkspace(old_ws_name)
|
workspace.SwitchWorkspace(old_ws_name)
|
||||||
return success
|
return success
|
||||||
|
|
|
||||||
|
|
@ -178,7 +178,12 @@ class TestOperators(hu.HypothesisTestCase):
|
||||||
return st.sampled_from([np.float32, np.float64])
|
return st.sampled_from([np.float32, np.float64])
|
||||||
|
|
||||||
_test_binary(
|
_test_binary(
|
||||||
"Div", ref, filter_=non_zero, test_gradient=True, dtypes=div_dtypes
|
"Div", ref, filter_=non_zero, test_gradient=True,
|
||||||
|
dtypes=div_dtypes, gcs=hu.gcs_cpu_only
|
||||||
|
)(self)
|
||||||
|
_test_binary(
|
||||||
|
"Div", ref, filter_=non_zero, test_gradient=False,
|
||||||
|
dtypes=div_dtypes
|
||||||
)(self)
|
)(self)
|
||||||
_test_binary_broadcast(
|
_test_binary_broadcast(
|
||||||
"Div", ref, filter_=non_zero, dtypes=div_dtypes)(self)
|
"Div", ref, filter_=non_zero, dtypes=div_dtypes)(self)
|
||||||
|
|
@ -269,8 +274,8 @@ class TestOperators(hu.HypothesisTestCase):
|
||||||
for param, _ in enumerate(inputs):
|
for param, _ in enumerate(inputs):
|
||||||
self.assertGradientChecks(gc, op, inputs, param, [0])
|
self.assertGradientChecks(gc, op, inputs, param, [0])
|
||||||
|
|
||||||
@unittest.skipIf(True,
|
@unittest.skipIf(not workspace.has_gpu_support,
|
||||||
"Recurrent only works on CUDA 7.5 and above")
|
"Skipping test due to no gpu present.")
|
||||||
@given(hidden_size=st.integers(min_value=1, max_value=3),
|
@given(hidden_size=st.integers(min_value=1, max_value=3),
|
||||||
num_layers=st.integers(min_value=1, max_value=3),
|
num_layers=st.integers(min_value=1, max_value=3),
|
||||||
bidirectional=st.booleans(),
|
bidirectional=st.booleans(),
|
||||||
|
|
@ -371,10 +376,10 @@ class TestOperators(hu.HypothesisTestCase):
|
||||||
pad_l=st.integers(0, 3),
|
pad_l=st.integers(0, 3),
|
||||||
pad_b=st.integers(0, 3),
|
pad_b=st.integers(0, 3),
|
||||||
pad_r=st.integers(0, 3),
|
pad_r=st.integers(0, 3),
|
||||||
kernel=st.integers(1, 5),
|
kernel=st.integers(3, 5),
|
||||||
size=st.integers(7, 10),
|
size=st.integers(8, 8),
|
||||||
input_channels=st.integers(1, 8),
|
input_channels=st.integers(1, 3),
|
||||||
output_channels=st.integers(1, 8),
|
output_channels=st.integers(1, 3),
|
||||||
batch_size=st.integers(1, 3),
|
batch_size=st.integers(1, 3),
|
||||||
order=st.sampled_from(["NCHW", "NHWC"]),
|
order=st.sampled_from(["NCHW", "NHWC"]),
|
||||||
engine=st.sampled_from([""]),
|
engine=st.sampled_from([""]),
|
||||||
|
|
@ -661,7 +666,8 @@ class TestOperators(hu.HypothesisTestCase):
|
||||||
output_channels=st.integers(1, 8),
|
output_channels=st.integers(1, 8),
|
||||||
batch_size=st.integers(1, 3),
|
batch_size=st.integers(1, 3),
|
||||||
order=st.sampled_from(["NCHW", "NHWC"]),
|
order=st.sampled_from(["NCHW", "NHWC"]),
|
||||||
engine=st.sampled_from([""]), **hu.gcs)
|
engine=st.sampled_from(["", "CUDNN"]), **hu.gcs)
|
||||||
|
@settings(max_examples=2, timeout=100)
|
||||||
def test_convolution_transpose_gradients(self, stride, pad, kernel,
|
def test_convolution_transpose_gradients(self, stride, pad, kernel,
|
||||||
size, input_channels,
|
size, input_channels,
|
||||||
output_channels, batch_size,
|
output_channels, batch_size,
|
||||||
|
|
@ -698,7 +704,7 @@ class TestOperators(hu.HypothesisTestCase):
|
||||||
input_channels=st.integers(1, 8),
|
input_channels=st.integers(1, 8),
|
||||||
output_channels=st.integers(1, 8),
|
output_channels=st.integers(1, 8),
|
||||||
batch_size=st.integers(1, 3),
|
batch_size=st.integers(1, 3),
|
||||||
engine=st.sampled_from([""]), **hu.gcs)
|
engine=st.sampled_from(["", "CUDNN"]), **hu.gcs)
|
||||||
def test_convolution_transpose_layout(self, stride, pad, kernel,
|
def test_convolution_transpose_layout(self, stride, pad, kernel,
|
||||||
size, input_channels,
|
size, input_channels,
|
||||||
output_channels, batch_size,
|
output_channels, batch_size,
|
||||||
|
|
@ -1049,7 +1055,7 @@ class TestOperators(hu.HypothesisTestCase):
|
||||||
@given(target_probabilities=hu.arrays(
|
@given(target_probabilities=hu.arrays(
|
||||||
dims=[10], elements=st.floats(allow_nan=False,
|
dims=[10], elements=st.floats(allow_nan=False,
|
||||||
allow_infinity=False,
|
allow_infinity=False,
|
||||||
min_value=0,
|
min_value=0.01,
|
||||||
max_value=1)),
|
max_value=1)),
|
||||||
**hu.gcs)
|
**hu.gcs)
|
||||||
def test_perplexity(self, target_probabilities, gc, dc):
|
def test_perplexity(self, target_probabilities, gc, dc):
|
||||||
|
|
@ -1477,9 +1483,13 @@ class TestOperators(hu.HypothesisTestCase):
|
||||||
op = core.CreateOperator("HasElements", ["data"], ["has_elements"])
|
op = core.CreateOperator("HasElements", ["data"], ["has_elements"])
|
||||||
self.assertReferenceChecks(gc, op, [data], lambda x: (len(x) > 0, ))
|
self.assertReferenceChecks(gc, op, [data], lambda x: (len(x) > 0, ))
|
||||||
|
|
||||||
|
op = core.CreateOperator("IsEmpty", ["data"], ["is_empty"])
|
||||||
|
self.assertReferenceChecks(gc, op, [data], lambda x: (len(x) == 0, ))
|
||||||
|
|
||||||
@given(initial_iters=st.integers(0, 100),
|
@given(initial_iters=st.integers(0, 100),
|
||||||
max_iters=st.integers(0, 100))
|
max_iters=st.integers(0, 100))
|
||||||
def test_criteria_net_with_execution_step(self, initial_iters, max_iters):
|
def test_should_stop_as_criteria_net_execution_step(
|
||||||
|
self, initial_iters, max_iters):
|
||||||
net = core.Net("net")
|
net = core.Net("net")
|
||||||
net.Iter(["iter"], ["iter"])
|
net.Iter(["iter"], ["iter"])
|
||||||
workspace.FeedBlob(
|
workspace.FeedBlob(
|
||||||
|
|
@ -1487,16 +1497,87 @@ class TestOperators(hu.HypothesisTestCase):
|
||||||
workspace.FeedBlob(
|
workspace.FeedBlob(
|
||||||
"num_iters", np.asarray([max_iters]).astype(np.int32))
|
"num_iters", np.asarray([max_iters]).astype(np.int32))
|
||||||
criteria_net = core.Net("criteria")
|
criteria_net = core.Net("criteria")
|
||||||
criteria_net.LT(["iter", "num_iters"], ["continue"])
|
criteria_net.GE(["iter", "num_iters"], ["stop"])
|
||||||
criteria_net.Proto().external_output.extend(["continue"])
|
criteria_net.Proto().external_output.extend(["stop"])
|
||||||
|
|
||||||
plan = core.Plan('plan')
|
plan = core.Plan('plan')
|
||||||
plan.AddStep(core.execution_step('step', net, criteria=criteria_net))
|
plan.AddStep(core.execution_step(
|
||||||
|
'step', [criteria_net, net],
|
||||||
|
should_stop_blob=core.BlobReference("stop")))
|
||||||
workspace.RunPlan(plan)
|
workspace.RunPlan(plan)
|
||||||
iters = workspace.FetchBlob("iter")
|
iters = workspace.FetchBlob("iter")
|
||||||
self.assertEqual(iters.dtype, np.int32)
|
self.assertEqual(iters.dtype, np.int32)
|
||||||
self.assertEqual(iters[0], max(initial_iters, max_iters))
|
self.assertEqual(iters[0], max(initial_iters, max_iters))
|
||||||
|
|
||||||
|
def test_disabled_execution_step(self):
|
||||||
|
def createNets(i, disabled):
|
||||||
|
should_stop = 'should_stop_{}'.format(i)
|
||||||
|
output = 'output_{}'.format(i)
|
||||||
|
|
||||||
|
# init content and stop signal
|
||||||
|
init = core.Net("init_{}".format(i))
|
||||||
|
init.ConstantFill(
|
||||||
|
[],
|
||||||
|
[output],
|
||||||
|
shape=[1],
|
||||||
|
value=0.0
|
||||||
|
)
|
||||||
|
init.Cast([output], [should_stop], to='bool')
|
||||||
|
|
||||||
|
# decide if disabled or not
|
||||||
|
criterion = core.Net("criterion_{}".format(i))
|
||||||
|
tmp = criterion.ConstantFill(
|
||||||
|
[],
|
||||||
|
shape=[1],
|
||||||
|
value=1.0 if disabled else 0.0
|
||||||
|
)
|
||||||
|
criterion.Cast([tmp], [should_stop], to='bool')
|
||||||
|
criterion.Proto().external_output.extend([should_stop])
|
||||||
|
|
||||||
|
# the body net is just to turn a 0 blob to 1
|
||||||
|
net = core.Net("net_{}".format(i))
|
||||||
|
net.ConstantFill(
|
||||||
|
[],
|
||||||
|
[output],
|
||||||
|
shape=[1],
|
||||||
|
value=1.0
|
||||||
|
)
|
||||||
|
|
||||||
|
# always end the loop
|
||||||
|
ender = core.Net("ender_{}".format(i))
|
||||||
|
tmp = ender.ConstantFill(
|
||||||
|
[],
|
||||||
|
shape=[1],
|
||||||
|
value=1.0
|
||||||
|
)
|
||||||
|
ender.Cast([tmp], [should_stop], to='bool')
|
||||||
|
ender.Proto().external_output.extend([should_stop])
|
||||||
|
|
||||||
|
return [init, criterion, net, ender]
|
||||||
|
|
||||||
|
nets = [createNets(1, False),
|
||||||
|
createNets(2, True),
|
||||||
|
createNets(3, False)]
|
||||||
|
steps = [
|
||||||
|
core.execution_step(
|
||||||
|
'step_1', nets[0],
|
||||||
|
should_stop_blob=core.BlobReference('should_stop_1')),
|
||||||
|
core.execution_step(
|
||||||
|
'step_2', nets[1],
|
||||||
|
should_stop_blob=core.BlobReference('should_stop_2')),
|
||||||
|
core.execution_step('step_3', nets[2])
|
||||||
|
]
|
||||||
|
expected = [1.0, 0.0, 1.0]
|
||||||
|
|
||||||
|
plan = core.Plan('plan')
|
||||||
|
plan.AddStep(core.execution_step('all_steps', steps, num_iter=3))
|
||||||
|
workspace.RunPlan(plan)
|
||||||
|
|
||||||
|
for i, net in enumerate(nets):
|
||||||
|
self.assertEqual(
|
||||||
|
workspace.FetchBlob('output_{}'.format(i + 1))[0],
|
||||||
|
expected[i])
|
||||||
|
|
||||||
@given(initial_iters=st.integers(0, 100),
|
@given(initial_iters=st.integers(0, 100),
|
||||||
num_iters=st.integers(0, 100))
|
num_iters=st.integers(0, 100))
|
||||||
def test_iter_count_with_execution_step(self, initial_iters, num_iters):
|
def test_iter_count_with_execution_step(self, initial_iters, num_iters):
|
||||||
|
|
@ -1523,6 +1604,13 @@ class TestOperators(hu.HypothesisTestCase):
|
||||||
def test_cast(self, a, src, dst, use_name, gc, dc):
|
def test_cast(self, a, src, dst, use_name, gc, dc):
|
||||||
a = a.astype(src)
|
a = a.astype(src)
|
||||||
|
|
||||||
|
# Casting from a float type outside the range of the integral
|
||||||
|
# type is UB.
|
||||||
|
ftypes = [np.float32, np.float64]
|
||||||
|
if src in ftypes and dst not in ftypes and dst is not np.bool:
|
||||||
|
info = np.iinfo(dst)
|
||||||
|
a = np.clip(a, info.min, info.max)
|
||||||
|
|
||||||
def ref(data):
|
def ref(data):
|
||||||
return [data.astype(dst)]
|
return [data.astype(dst)]
|
||||||
|
|
||||||
|
|
@ -1571,7 +1659,8 @@ class TestOperators(hu.HypothesisTestCase):
|
||||||
self.assertDeviceChecks(dc, op, [X], [0])
|
self.assertDeviceChecks(dc, op, [X], [0])
|
||||||
self.assertGradientChecks(gc, op, [X], 0, [0])
|
self.assertGradientChecks(gc, op, [X], 0, [0])
|
||||||
|
|
||||||
@given(X=hu.tensor(), seed=st.integers(min_value=0, max_value=65536),
|
@given(X=_dtypes().flatmap(lambda dtype: hu.tensor(dtype=dtype)),
|
||||||
|
seed=st.integers(min_value=0, max_value=65536),
|
||||||
null_axes=st.booleans(),
|
null_axes=st.booleans(),
|
||||||
**hu.gcs)
|
**hu.gcs)
|
||||||
def test_transpose(self, X, seed, null_axes, gc, dc):
|
def test_transpose(self, X, seed, null_axes, gc, dc):
|
||||||
|
|
@ -1589,6 +1678,7 @@ class TestOperators(hu.HypothesisTestCase):
|
||||||
|
|
||||||
self.assertReferenceChecks(gc, op, [X, axes],
|
self.assertReferenceChecks(gc, op, [X, axes],
|
||||||
transpose_ref)
|
transpose_ref)
|
||||||
|
if X.dtype != np.int32 and X.dtype != np.int64:
|
||||||
self.assertGradientChecks(gc, op, [X], 0, [0])
|
self.assertGradientChecks(gc, op, [X], 0, [0])
|
||||||
|
|
||||||
@given(n=st.integers(1, 3),
|
@given(n=st.integers(1, 3),
|
||||||
|
|
|
||||||
|
|
@ -263,7 +263,11 @@ class HypothesisTestCase(test_util.TestCase):
|
||||||
outs = []
|
outs = []
|
||||||
for (n, ref) in zip(op.output, reference_outputs):
|
for (n, ref) in zip(op.output, reference_outputs):
|
||||||
output = workspace.FetchBlob(n)
|
output = workspace.FetchBlob(n)
|
||||||
np.testing.assert_allclose(output, ref, atol=1e-4, rtol=1e-4)
|
if output.dtype.kind in ('S', 'O'):
|
||||||
|
np.testing.assert_array_equal(output, ref)
|
||||||
|
else:
|
||||||
|
np.testing.assert_allclose(
|
||||||
|
output, ref, atol=1e-4, rtol=1e-4)
|
||||||
outs.append(output)
|
outs.append(output)
|
||||||
if grad_reference and output_to_grad:
|
if grad_reference and output_to_grad:
|
||||||
self._assertGradReferenceChecks(
|
self._assertGradReferenceChecks(
|
||||||
|
|
|
||||||
138
caffe2/python/io.py
Normal file
138
caffe2/python/io.py
Normal file
|
|
@ -0,0 +1,138 @@
|
||||||
|
"""
|
||||||
|
Defines the base interface for reading and writing operations.
|
||||||
|
|
||||||
|
Readers/Writers are objects that produce operations that read/write sequences
|
||||||
|
of data. Each operation reads or writes a list of BlobReferences.
|
||||||
|
|
||||||
|
Readers and Writers must be implemented such that read and write operations
|
||||||
|
are atomic and thread safe.
|
||||||
|
|
||||||
|
Examples of possible Readers and Writers:
|
||||||
|
HiveReader, HiveWriter,
|
||||||
|
QueueReader, QueueWriter,
|
||||||
|
DatasetReader, DatasetWriter,
|
||||||
|
DBReader, DBWriter,
|
||||||
|
|
||||||
|
See `dataset.py` for an example of implementation.
|
||||||
|
"""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from caffe2.python import core
|
||||||
|
|
||||||
|
|
||||||
|
class Reader(object):
|
||||||
|
"""
|
||||||
|
Reader is a abstract class to be implemented in order to provide
|
||||||
|
operations capable of iterating through a dataset or stream of data.
|
||||||
|
|
||||||
|
A Reader must implement at least one operation, `read`, which
|
||||||
|
adds operations to a net that read the next batch of data. Readers can
|
||||||
|
optionally support the `reset` operation, which is useful when multiple
|
||||||
|
passes over the data are required.
|
||||||
|
"""
|
||||||
|
def read(self, read_net, batch_size=1, *args):
|
||||||
|
"""
|
||||||
|
Add operations to read_net that will read the read batch of data
|
||||||
|
and return a list of BlobReference representing the blobs that will
|
||||||
|
contain the batches produced.
|
||||||
|
|
||||||
|
Operations added to `read_net` must be thread safe and atomic, that is,
|
||||||
|
it should be possible to clone `read_net` and run multiple instances of
|
||||||
|
it in parallel.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
read_net: the net that will be appended with read operations
|
||||||
|
batch_size: number of entires to read
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tuple (should_stop, fields), with:
|
||||||
|
|
||||||
|
should_stop: BlobReference pointing to a boolean scalar
|
||||||
|
blob that indicates whether the read operation
|
||||||
|
was succesfull or whether the end of data has
|
||||||
|
been reached.
|
||||||
|
fields: A tuple of BlobReference containing the latest batch
|
||||||
|
of data that was read.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError('Readers must implement `read`.')
|
||||||
|
|
||||||
|
def reset(self, net):
|
||||||
|
"""Append operations to `net` that will reset the reader.
|
||||||
|
|
||||||
|
This can be used to read the data multiple times.
|
||||||
|
Not all readers support this operation.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError('This reader cannot be resetted.')
|
||||||
|
|
||||||
|
def execution_step(self, reader_net_name=None, batch_size=1):
|
||||||
|
"""Create an execution step with a net containing read operators.
|
||||||
|
|
||||||
|
The execution step will contain a `stop_blob` that knows how to stop
|
||||||
|
the execution loop when end of data was reached.
|
||||||
|
|
||||||
|
E.g.:
|
||||||
|
|
||||||
|
read_step, fields = reader.execution_step()
|
||||||
|
consume_net = core.Net('consume')
|
||||||
|
consume_net.Print(fields[0], [])
|
||||||
|
p = core.Plan('reader')
|
||||||
|
p.AddStep(read_step.AddNet(consume_net))
|
||||||
|
core.RunPlan(p)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
|
||||||
|
reader_net_name: (optional) the name of the reader_net to be
|
||||||
|
created. The execution step will
|
||||||
|
be named accordingly.
|
||||||
|
batch_size: the batch size
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tuple (read_step, fields), with:
|
||||||
|
|
||||||
|
read_step: A newly created execution step containing a net with
|
||||||
|
read operations. The step will have `stop_blob` set,
|
||||||
|
in order to stop the loop on end of data.
|
||||||
|
fields: A tuple of BlobReference containing the latest batch
|
||||||
|
of data that was read.
|
||||||
|
"""
|
||||||
|
reader_net = core.Net(reader_net_name or 'reader')
|
||||||
|
should_stop, fields = self.read(reader_net, batch_size=batch_size)
|
||||||
|
read_step = core.execution_step(
|
||||||
|
'{}_step'.format(reader_net_name),
|
||||||
|
reader_net,
|
||||||
|
should_stop_blob=should_stop)
|
||||||
|
return (read_step, fields)
|
||||||
|
|
||||||
|
|
||||||
|
class Writer(object):
|
||||||
|
"""
|
||||||
|
Writer is a abstract class to be implemented in order to provide
|
||||||
|
operations capable of feeding a data stream or a dataset.
|
||||||
|
|
||||||
|
A Writer must implement 2 operations:
|
||||||
|
`write`, which adds operations to a net that write the write batch of
|
||||||
|
data, and `commit`, which adds operations to a net in order to indicate
|
||||||
|
that no more data will be written.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def write(self, writer_net, fields):
|
||||||
|
"""Add operations to `writer_net` that write the next batch of data.
|
||||||
|
|
||||||
|
Operations added to the net must be thread-safe and unique, that is:
|
||||||
|
multiple writers must be able to write to the dataset in parallel.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fields: a tuple of BlobReference containing the batch of data to
|
||||||
|
write.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError('Writers must implement write.')
|
||||||
|
|
||||||
|
def commit(self, finish_net):
|
||||||
|
"""Add operations to `finish_net` that signal end of data.
|
||||||
|
|
||||||
|
This must be implemented by all Writers, but may be no-op for some
|
||||||
|
of them.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError('Writers must implement commit.')
|
||||||
|
|
@ -116,7 +116,7 @@ class TestMiniAlexNet(test_util.TestCase):
|
||||||
gpu_device = caffe2_pb2.DeviceOption()
|
gpu_device = caffe2_pb2.DeviceOption()
|
||||||
gpu_device.device_type = caffe2_pb2.CUDA
|
gpu_device.device_type = caffe2_pb2.CUDA
|
||||||
|
|
||||||
checker = device_checker.DeviceChecker(1e-2, [cpu_device, gpu_device])
|
checker = device_checker.DeviceChecker(0.05, [cpu_device, gpu_device])
|
||||||
ret = checker.CheckNet(
|
ret = checker.CheckNet(
|
||||||
model.net.Proto(),
|
model.net.Proto(),
|
||||||
inputs,
|
inputs,
|
||||||
|
|
@ -126,15 +126,16 @@ class TestMiniAlexNet(test_util.TestCase):
|
||||||
)
|
)
|
||||||
self.assertEqual(ret, True)
|
self.assertEqual(ret, True)
|
||||||
|
|
||||||
def testMiniAlexNet(self):
|
@unittest.skipIf(not workspace.has_gpu_support,
|
||||||
|
"No GPU support. Skipping test.")
|
||||||
|
def testMiniAlexNetNCHW(self):
|
||||||
self._testMiniAlexNet("NCHW")
|
self._testMiniAlexNet("NCHW")
|
||||||
|
|
||||||
|
@unittest.skipIf(not workspace.has_gpu_support,
|
||||||
|
"No GPU support. Skipping test.")
|
||||||
|
def testMiniAlexNetNHWC(self):
|
||||||
self._testMiniAlexNet("NHWC")
|
self._testMiniAlexNet("NHWC")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if not workspace.has_gpu_support:
|
|
||||||
print('No GPU support. Skipping gpu test.')
|
|
||||||
elif workspace.NumCudaDevices() == 0:
|
|
||||||
print('No GPU device. Skipping gpu test.')
|
|
||||||
else:
|
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import argparse
|
import argparse
|
||||||
|
import json
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from caffe2.python import utils
|
from caffe2.python import utils
|
||||||
|
|
||||||
|
|
@ -53,6 +54,11 @@ def _rectify_operator_and_name(operators_or_net, name):
|
||||||
return operators, name
|
return operators, name
|
||||||
|
|
||||||
|
|
||||||
|
def _escape_label(name):
|
||||||
|
# json.dumps is poor man's escaping
|
||||||
|
return json.dumps(name)
|
||||||
|
|
||||||
|
|
||||||
def GetPydotGraph(operators_or_net, name=None, rankdir='LR'):
|
def GetPydotGraph(operators_or_net, name=None, rankdir='LR'):
|
||||||
operators, name = _rectify_operator_and_name(operators_or_net, name)
|
operators, name = _rectify_operator_and_name(operators_or_net, name)
|
||||||
graph = pydot.Dot(name, rankdir=rankdir)
|
graph = pydot.Dot(name, rankdir=rankdir)
|
||||||
|
|
@ -73,7 +79,7 @@ def GetPydotGraph(operators_or_net, name=None, rankdir='LR'):
|
||||||
if input_name not in pydot_nodes:
|
if input_name not in pydot_nodes:
|
||||||
input_node = pydot.Node(
|
input_node = pydot.Node(
|
||||||
input_name + str(pydot_node_counts[input_name]),
|
input_name + str(pydot_node_counts[input_name]),
|
||||||
label=input_name,
|
label=_escape_label(input_name),
|
||||||
**BLOB_STYLE
|
**BLOB_STYLE
|
||||||
)
|
)
|
||||||
pydot_nodes[input_name] = input_node
|
pydot_nodes[input_name] = input_node
|
||||||
|
|
@ -87,7 +93,7 @@ def GetPydotGraph(operators_or_net, name=None, rankdir='LR'):
|
||||||
pydot_node_counts[output_name] += 1
|
pydot_node_counts[output_name] += 1
|
||||||
output_node = pydot.Node(
|
output_node = pydot.Node(
|
||||||
output_name + str(pydot_node_counts[output_name]),
|
output_name + str(pydot_node_counts[output_name]),
|
||||||
label=output_name,
|
label=_escape_label(output_name),
|
||||||
**BLOB_STYLE
|
**BLOB_STYLE
|
||||||
)
|
)
|
||||||
pydot_nodes[output_name] = output_node
|
pydot_nodes[output_name] = output_node
|
||||||
|
|
|
||||||
43
caffe2/python/operator_test/atomic_ops_test.py
Normal file
43
caffe2/python/operator_test/atomic_ops_test.py
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from caffe2.python import core, workspace
|
||||||
|
from caffe2.python.test_util import TestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TestAtomicOps(TestCase):
|
||||||
|
def test_atomic_ops(self):
|
||||||
|
"""
|
||||||
|
Test that both countdown and checksum are update atomically by having
|
||||||
|
cowntdown count from 20k to 0 from parallel the workers and updating
|
||||||
|
the checksum to the value fetched. If operations are trully atomic,
|
||||||
|
each value from 1 to 20k should be fetched exactly once from the
|
||||||
|
countdown, and fed exactly once to the checksum, such that at the end
|
||||||
|
checksum must contain the exact value of sum[i=0..20000](i).
|
||||||
|
"""
|
||||||
|
init_net = core.Net('init')
|
||||||
|
mutex_countdown = init_net.CreateMutex([])
|
||||||
|
mutex_checksum = init_net.CreateMutex([])
|
||||||
|
countdown = init_net.ConstantIntFill([], shape=[], value=20000.)
|
||||||
|
checksum = init_net.ConstantIntFill([], shape=[], value=0.)
|
||||||
|
minus_one = init_net.ConstantIntFill([], shape=[], value=-1.)
|
||||||
|
steps = []
|
||||||
|
for i in range(0, 100):
|
||||||
|
net = core.Net('net:%d' % i)
|
||||||
|
_, fetched_count = net.AtomicFetchAdd(
|
||||||
|
[mutex_countdown, countdown, minus_one],
|
||||||
|
[countdown, 'fetched_count:%d' % i])
|
||||||
|
net.AtomicFetchAdd(
|
||||||
|
[mutex_checksum, checksum, fetched_count],
|
||||||
|
[checksum, 'not_used'])
|
||||||
|
steps.append(
|
||||||
|
core.execution_step('worker:%d' % i, net, num_iter=200))
|
||||||
|
super_step = core.execution_step(
|
||||||
|
'parent', steps, concurrent_substeps=True)
|
||||||
|
plan = core.Plan('plan')
|
||||||
|
plan.AddStep(core.execution_step('init', init_net))
|
||||||
|
plan.AddStep(super_step)
|
||||||
|
workspace.RunPlan(plan)
|
||||||
|
# checksum = sum[i=1..20000](i) = 20000 * 20001 / 2 = 200010000
|
||||||
|
self.assertEquals(workspace.FetchBlob(checksum), 200010000)
|
||||||
42
caffe2/python/operator_test/counter_ops_test.py
Normal file
42
caffe2/python/operator_test/counter_ops_test.py
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from caffe2.python import core, workspace
|
||||||
|
from caffe2.python.test_util import TestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TestCounterOps(TestCase):
|
||||||
|
def test_counter_ops(self):
|
||||||
|
workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'CreateCounter', [], ['c'], init_count=1))
|
||||||
|
workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'CountDown', ['c'], ['t1'])) # 1 -> 0
|
||||||
|
assert not workspace.FetchBlob('t1')
|
||||||
|
|
||||||
|
workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'CountDown', ['c'], ['t2'])) # 0 -> 0
|
||||||
|
assert workspace.FetchBlob('t2')
|
||||||
|
|
||||||
|
workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'ResetCounter', ['c'], [], init_count=1)) # -> 1
|
||||||
|
workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'CountDown', ['c'], ['t3'])) # 1 -> 0
|
||||||
|
assert not workspace.FetchBlob('t3')
|
||||||
|
|
||||||
|
workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'ConstantBoolFill', [], ['t4'], value=0.0, shape=[]))
|
||||||
|
assert workspace.FetchBlob('t4') == workspace.FetchBlob('t1')
|
||||||
|
|
||||||
|
workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'ConstantBoolFill', [], ['t5'], value=1.0, shape=[]))
|
||||||
|
assert workspace.FetchBlob('t5') == workspace.FetchBlob('t2')
|
||||||
|
|
||||||
|
assert workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'And', ['t1', 't2'], ['t6']))
|
||||||
|
assert not workspace.FetchBlob('t6') # True && False
|
||||||
|
|
||||||
|
assert workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'And', ['t2', 't5'], ['t7']))
|
||||||
|
assert workspace.FetchBlob('t7') # True && True
|
||||||
71
caffe2/python/operator_test/cross_entropy_ops_test.py
Normal file
71
caffe2/python/operator_test/cross_entropy_ops_test.py
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from caffe2.python import core
|
||||||
|
from hypothesis import given
|
||||||
|
import caffe2.python.hypothesis_test_util as hu
|
||||||
|
import hypothesis.strategies as st
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def sigmoid(x):
|
||||||
|
return 1.0 / (1.0 + np.exp(-x))
|
||||||
|
|
||||||
|
|
||||||
|
def sigmoid_cross_entropy_with_logits(x, z):
|
||||||
|
return np.maximum(x, 0) - x * z + np.log(1 + np.exp(-np.abs(x)))
|
||||||
|
|
||||||
|
|
||||||
|
def sigmoid_cross_entropy_with_logits_grad(x, z):
|
||||||
|
return z - sigmoid(x)
|
||||||
|
|
||||||
|
|
||||||
|
class TestCrossEntropyOps(hu.HypothesisTestCase):
|
||||||
|
@given(
|
||||||
|
inputs=st.lists(
|
||||||
|
elements=st.integers(min_value=1, max_value=5),
|
||||||
|
min_size=1,
|
||||||
|
max_size=2,
|
||||||
|
average_size=2,
|
||||||
|
).flatmap(
|
||||||
|
lambda shape: st.tuples(
|
||||||
|
hu.arrays(
|
||||||
|
dims=shape,
|
||||||
|
elements=st.one_of(
|
||||||
|
st.floats(min_value=-1.0, max_value=-0.1),
|
||||||
|
st.floats(min_value=0.1, max_value=1.0),
|
||||||
|
)),
|
||||||
|
hu.arrays(
|
||||||
|
dims=shape,
|
||||||
|
elements=st.sampled_from([0.0, 1.0]),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_sigmoid_cross_entropy_with_logits(self, inputs):
|
||||||
|
logits, targets = inputs
|
||||||
|
|
||||||
|
def sigmoid_xentr_logit_ref(logits, targets):
|
||||||
|
s = sigmoid_cross_entropy_with_logits(logits, targets)
|
||||||
|
m = np.mean(s, axis=len(logits.shape) - 1)
|
||||||
|
return (m, )
|
||||||
|
|
||||||
|
def sigmoid_xentr_logit_grad_ref(g_out, outputs, fwd_inputs):
|
||||||
|
fwd_logits, fwd_targets = fwd_inputs
|
||||||
|
inner_size = fwd_logits.shape[-1]
|
||||||
|
m = fwd_targets - sigmoid(fwd_logits)
|
||||||
|
g_in = -np.expand_dims(g_out, axis=-1) * m / inner_size
|
||||||
|
return (g_in, None)
|
||||||
|
|
||||||
|
op = core.CreateOperator(
|
||||||
|
'SigmoidCrossEntropyWithLogits',
|
||||||
|
['logits', 'targets'],
|
||||||
|
['xentropy'])
|
||||||
|
self.assertReferenceChecks(
|
||||||
|
hu.cpu_do,
|
||||||
|
op,
|
||||||
|
[logits, targets],
|
||||||
|
sigmoid_xentr_logit_ref,
|
||||||
|
output_to_grad='xentropy',
|
||||||
|
grad_reference=sigmoid_xentr_logit_grad_ref)
|
||||||
284
caffe2/python/operator_test/dataset_ops_test.py
Normal file
284
caffe2/python/operator_test/dataset_ops_test.py
Normal file
|
|
@ -0,0 +1,284 @@
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
import numpy as np
|
||||||
|
from caffe2.python import core, workspace, dataset
|
||||||
|
from caffe2.python.dataset import Const
|
||||||
|
from caffe2.python.schema import List, Struct, Scalar, Map
|
||||||
|
from caffe2.python.test_util import TestCase
|
||||||
|
|
||||||
|
|
||||||
|
def _assert_arrays_equal(actual, ref, err_msg):
|
||||||
|
if ref.dtype.kind in ('S', 'O'):
|
||||||
|
np.testing.assert_array_equal(actual, ref, err_msg=err_msg)
|
||||||
|
else:
|
||||||
|
np.testing.assert_allclose(
|
||||||
|
actual, ref, atol=1e-4, rtol=1e-4, err_msg=err_msg)
|
||||||
|
|
||||||
|
|
||||||
|
class TestDatasetOps(TestCase):
|
||||||
|
def test_dataset_ops(self):
|
||||||
|
"""
|
||||||
|
1. Defining the schema of our dataset.
|
||||||
|
|
||||||
|
This example schema could represent, for example, a search query log.
|
||||||
|
"""
|
||||||
|
schema = Struct(
|
||||||
|
# fixed size vector, which will be stored as a matrix when batched
|
||||||
|
('dense', Scalar((np.float32, 3))),
|
||||||
|
# could represent a feature map from feature ID to float value
|
||||||
|
('floats', Map(
|
||||||
|
Scalar(np.int32),
|
||||||
|
Scalar(np.float32))),
|
||||||
|
# could represent a multi-valued categorical feature map
|
||||||
|
('int_lists', Map(
|
||||||
|
Scalar(np.int32),
|
||||||
|
List(Scalar(np.int64)),
|
||||||
|
)),
|
||||||
|
# could represent a multi-valued, weighted categorical feature map
|
||||||
|
('id_score_pairs', Map(
|
||||||
|
Scalar(np.int32),
|
||||||
|
Map(
|
||||||
|
Scalar(np.int64),
|
||||||
|
Scalar(np.float32),
|
||||||
|
keys_name='ids',
|
||||||
|
values_name='scores'),
|
||||||
|
)),
|
||||||
|
# additional scalar information
|
||||||
|
('metadata', Struct(
|
||||||
|
('user_id', Scalar(np.int64)),
|
||||||
|
('user_embed', Scalar((np.float32, 2))),
|
||||||
|
('query', Scalar(str)),
|
||||||
|
)),
|
||||||
|
)
|
||||||
|
|
||||||
|
"""
|
||||||
|
This is what the flattened fields for this schema look like, along
|
||||||
|
with its type. Each one of these fields will be stored, read and
|
||||||
|
writen as a tensor.
|
||||||
|
"""
|
||||||
|
expected_fields = [
|
||||||
|
('dense', (np.float32, 3)),
|
||||||
|
('floats:lengths', np.int32),
|
||||||
|
('floats:values:keys', np.int32),
|
||||||
|
('floats:values:values', np.float32),
|
||||||
|
('int_lists:lengths', np.int32),
|
||||||
|
('int_lists:values:keys', np.int32),
|
||||||
|
('int_lists:values:values:lengths', np.int32),
|
||||||
|
('int_lists:values:values:values', np.int64),
|
||||||
|
('id_score_pairs:lengths', np.int32),
|
||||||
|
('id_score_pairs:values:keys', np.int32),
|
||||||
|
('id_score_pairs:values:values:lengths', np.int32),
|
||||||
|
('id_score_pairs:values:values:values:ids', np.int64),
|
||||||
|
('id_score_pairs:values:values:values:scores', np.float32),
|
||||||
|
('metadata:user_id', np.int64),
|
||||||
|
('metadata:user_embed', (np.float32, 2)),
|
||||||
|
('metadata:query', str),
|
||||||
|
]
|
||||||
|
zipped = zip(
|
||||||
|
expected_fields,
|
||||||
|
schema.field_names(),
|
||||||
|
schema.field_types())
|
||||||
|
for (ref_name, ref_type), name, dtype in zipped:
|
||||||
|
self.assertEquals(ref_name, name)
|
||||||
|
self.assertEquals(np.dtype(ref_type), dtype)
|
||||||
|
|
||||||
|
"""
|
||||||
|
2. The contents of our dataset.
|
||||||
|
|
||||||
|
Contents as defined below could represent, for example, a log of
|
||||||
|
search queries along with dense, sparse features and metadata.
|
||||||
|
The datset below has 3 top-level entries.
|
||||||
|
"""
|
||||||
|
contents_raw = [
|
||||||
|
# dense
|
||||||
|
[[1.1, 1.2, 1.3], [2.1, 2.2, 2.3], [3.1, 3.2, 3.3]],
|
||||||
|
# floats
|
||||||
|
[1, 2, 3], # len
|
||||||
|
[11, 21, 22, 31, 32, 33], # key
|
||||||
|
[1.1, 2.1, 2.2, 3.1, 3.2, 3.3], # value
|
||||||
|
# int lists
|
||||||
|
[2, 0, 2], # len
|
||||||
|
[11, 12, 31, 32], # key
|
||||||
|
[2, 4, 3, 1], # value:len
|
||||||
|
[111, 112, 121, 122, 123, 124, 311, 312, 313, 321], # value:value
|
||||||
|
# id score pairs
|
||||||
|
[1, 2, 2], # len
|
||||||
|
[11, 21, 22, 31, 32], # key
|
||||||
|
[1, 1, 2, 2, 3], # value:len
|
||||||
|
[111, 211, 221, 222, 311, 312, 321, 322, 323], # value:ids
|
||||||
|
[11.1, 21.1, 22.1, 22.2, 31.1, 31.2, 32.1, 32.2, 32.3], # val:score
|
||||||
|
# metadata
|
||||||
|
[123, 234, 456], # user_id
|
||||||
|
[[0.2, 0.8], [0.5, 0.5], [0.7, 0.3]], # user_embed
|
||||||
|
['dog posts', 'friends who like to', 'posts about ca'], # query
|
||||||
|
]
|
||||||
|
# convert the above content to ndarrays, checking against the schema
|
||||||
|
contents = dataset.to_ndarray_list(contents_raw, schema)
|
||||||
|
|
||||||
|
"""
|
||||||
|
3. Creating and appending to the dataset.
|
||||||
|
We first create an empty dataset with the given schema.
|
||||||
|
Then, a Writer is used to append these entries to the dataset.
|
||||||
|
"""
|
||||||
|
ds = dataset.Dataset(schema)
|
||||||
|
net = core.Net('init')
|
||||||
|
ds.init_empty(net)
|
||||||
|
|
||||||
|
blobs_to_append = [Const(net, c) for c in contents]
|
||||||
|
writer = ds.writer(init_net=net)
|
||||||
|
writer.write(net, blobs_to_append)
|
||||||
|
workspace.RunNetOnce(net)
|
||||||
|
|
||||||
|
"""
|
||||||
|
4. Iterating through the dataset contents.
|
||||||
|
|
||||||
|
If we were to iterate through the top level entries of our dataset,
|
||||||
|
this is what we should expect to see:
|
||||||
|
"""
|
||||||
|
entries_raw = [
|
||||||
|
(
|
||||||
|
[[1.1, 1.2, 1.3]], # dense
|
||||||
|
[1], [11], [1.1], # floats
|
||||||
|
[2], [11, 12], [2, 4], [111, 112, 121, 122, 123, 124], # intlst
|
||||||
|
[1], [11], [1], [111], [11.1], # id score pairs
|
||||||
|
[123], [[0.2, 0.8]], ['dog posts'], # metadata
|
||||||
|
),
|
||||||
|
(
|
||||||
|
[[2.1, 2.2, 2.3]], # dense
|
||||||
|
[2], [21, 22], [2.1, 2.2], # floats
|
||||||
|
[0], [], [], [], # int list
|
||||||
|
[2], [21, 22], [1, 2], [211, 221, 222], [21.1, 22.1, 22.2],
|
||||||
|
[234], [[0.5, 0.5]], ['friends who like to'], # metadata
|
||||||
|
),
|
||||||
|
(
|
||||||
|
[[3.1, 3.2, 3.3]], # dense
|
||||||
|
[3], [31, 32, 33], [3.1, 3.2, 3.3], # floats
|
||||||
|
[2], [31, 32], [3, 1], [311, 312, 313, 321], # int lst
|
||||||
|
[2], [31, 32], [2, 3], [311, 312, 321, 322, 323],
|
||||||
|
[31.1, 31.2, 32.1, 32.2, 32.3], # id score list
|
||||||
|
[456], [[0.7, 0.3]], ['posts about ca'], # metadata
|
||||||
|
),
|
||||||
|
# after the end of the dataset, we will keep getting empty vectors
|
||||||
|
([],) * 16,
|
||||||
|
([],) * 16,
|
||||||
|
]
|
||||||
|
entries = [dataset.to_ndarray_list(e, schema) for e in entries_raw]
|
||||||
|
|
||||||
|
"""
|
||||||
|
Let's go ahead and create the reading nets.
|
||||||
|
We will run `read` net multiple times and assert that we are reading the
|
||||||
|
entries the way we stated above.
|
||||||
|
"""
|
||||||
|
read_init_net = core.Net('read_init')
|
||||||
|
read_next_net = core.Net('read_next')
|
||||||
|
reader = ds.reader(read_init_net)
|
||||||
|
should_continue, batch_blobs = reader.read(read_next_net)
|
||||||
|
|
||||||
|
workspace.RunNetOnce(read_init_net)
|
||||||
|
|
||||||
|
workspace.CreateNet(read_next_net)
|
||||||
|
read_next_net_name = str(read_next_net)
|
||||||
|
|
||||||
|
for i, entry in enumerate(entries):
|
||||||
|
workspace.RunNet(read_next_net_name)
|
||||||
|
for name, blob, base in zip(ds.field_names(), batch_blobs, entry):
|
||||||
|
data = workspace.FetchBlob(str(blob))
|
||||||
|
_assert_arrays_equal(
|
||||||
|
data, base,
|
||||||
|
err_msg='Mismatch in entry %d, field %s' % (i, name))
|
||||||
|
|
||||||
|
"""
|
||||||
|
5. Reading/writing in a single plan
|
||||||
|
|
||||||
|
If all of operations on the data are expressible as Caffe2 operators,
|
||||||
|
we don't need to load the data to python, iterating through the dataset
|
||||||
|
in a single Plan.
|
||||||
|
|
||||||
|
Where we will process the dataset a little and store it in a second
|
||||||
|
dataset. We can reuse the same Reader since it supports reset.
|
||||||
|
"""
|
||||||
|
reset_net = core.Net('reset_net')
|
||||||
|
reader.reset(reset_net)
|
||||||
|
read_step, fields = reader.execution_step()
|
||||||
|
|
||||||
|
""" We will add the line number * 1000 to the feature ids. """
|
||||||
|
process_net = core.Net('process')
|
||||||
|
line_no = Const(process_net, 0, dtype=np.int32)
|
||||||
|
const_one = Const(process_net, 1000, dtype=np.int32)
|
||||||
|
process_net.Add([line_no, const_one], [line_no])
|
||||||
|
fid = schema.floats.values.keys.id()
|
||||||
|
process_net.Print(fields[fid], [])
|
||||||
|
process_net.Add([fields[fid], line_no], fields[fid], broadcast=1)
|
||||||
|
|
||||||
|
""" Lets create a second dataset and append to it. """
|
||||||
|
ds2 = dataset.Dataset(schema, name='dataset2')
|
||||||
|
ds2.init_empty(reset_net)
|
||||||
|
writer = ds2.writer(reset_net)
|
||||||
|
writer.write(process_net, fields)
|
||||||
|
# commit is not necessary for DatasetWriter but will add it for
|
||||||
|
# generality of the example
|
||||||
|
commit_net = core.Net('commit')
|
||||||
|
writer.commit(commit_net)
|
||||||
|
|
||||||
|
""" Time to create and run a plan which will do the processing """
|
||||||
|
plan = core.Plan('process')
|
||||||
|
plan.AddStep(core.execution_step('reset', reset_net))
|
||||||
|
plan.AddStep(read_step.AddNet(process_net))
|
||||||
|
plan.AddStep(core.execution_step('commit', commit_net))
|
||||||
|
workspace.RunPlan(plan)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Now we should have dataset2 populated.
|
||||||
|
"""
|
||||||
|
ds2blobs = ds2.get_blobs()
|
||||||
|
for i, (name, blob) in enumerate(zip(schema.field_names(), ds2blobs)):
|
||||||
|
data = workspace.FetchBlob(str(blob))
|
||||||
|
content = contents[i]
|
||||||
|
if i == fid:
|
||||||
|
# one of our fields has been added with line numbers * 1000
|
||||||
|
content += [1000, 2000, 2000, 3000, 3000, 3000]
|
||||||
|
_assert_arrays_equal(
|
||||||
|
data, contents[i], err_msg='Mismatch in field %s.' % name)
|
||||||
|
|
||||||
|
"""
|
||||||
|
6. Slicing a dataset
|
||||||
|
|
||||||
|
You can create a new schema from pieces of another schema and reuse
|
||||||
|
the same data.
|
||||||
|
"""
|
||||||
|
subschema = Struct(('top_level', schema.int_lists.values))
|
||||||
|
int_list_contents = contents[schema.int_lists.values.slice()]
|
||||||
|
self.assertEquals(len(subschema.field_names()), len(int_list_contents))
|
||||||
|
|
||||||
|
"""
|
||||||
|
7. Random Access a dataset
|
||||||
|
|
||||||
|
"""
|
||||||
|
read_init_net = core.Net('read_init')
|
||||||
|
read_next_net = core.Net('read_next')
|
||||||
|
|
||||||
|
idx = np.array([2, 1, 0])
|
||||||
|
workspace.FeedBlob('idx', idx)
|
||||||
|
|
||||||
|
reader = ds.random_reader(read_init_net, 'idx')
|
||||||
|
reader.computeoffset(read_init_net)
|
||||||
|
|
||||||
|
should_continue, batch_blobs = reader.read(read_next_net)
|
||||||
|
|
||||||
|
workspace.CreateNet(read_init_net)
|
||||||
|
workspace.RunNetOnce(read_init_net)
|
||||||
|
|
||||||
|
workspace.CreateNet(read_next_net)
|
||||||
|
read_next_net_name = str(read_next_net)
|
||||||
|
|
||||||
|
for i in range(len(entries)):
|
||||||
|
k = idx[i] if i in idx else i
|
||||||
|
entry = entries[k]
|
||||||
|
workspace.RunNet(read_next_net_name)
|
||||||
|
for name, blob, base in zip(ds.field_names(), batch_blobs, entry):
|
||||||
|
data = workspace.FetchBlob(str(blob))
|
||||||
|
_assert_arrays_equal(
|
||||||
|
data, base,
|
||||||
|
err_msg='Mismatch in entry %d, field %s' % (i, name))
|
||||||
|
|
@ -2,9 +2,9 @@ from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import numpy as np
|
|
||||||
from caffe2.python import core, workspace
|
from caffe2.python import core, workspace
|
||||||
from caffe2.python.test_util import TestCase
|
from caffe2.python.test_util import TestCase
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class TestIndexOps(TestCase):
|
class TestIndexOps(TestCase):
|
||||||
|
|
@ -47,6 +47,13 @@ class TestIndexOps(TestCase):
|
||||||
result2 = workspace.FetchBlob('result2')
|
result2 = workspace.FetchBlob('result2')
|
||||||
np.testing.assert_array_equal([0, 5, 1, 0, 0], result2)
|
np.testing.assert_array_equal([0, 5, 1, 0, 0], result2)
|
||||||
|
|
||||||
|
workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'IndexSize',
|
||||||
|
['index'],
|
||||||
|
['index_size']))
|
||||||
|
size = workspace.FetchBlob('index_size')
|
||||||
|
self.assertEquals(size, 6)
|
||||||
|
|
||||||
workspace.RunOperatorOnce(core.CreateOperator(
|
workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
'IndexStore',
|
'IndexStore',
|
||||||
['index'],
|
['index'],
|
||||||
|
|
@ -55,3 +62,21 @@ class TestIndexOps(TestCase):
|
||||||
new_entries = np.array(['new_entry1', 'new_entry2'], dtype=str)
|
new_entries = np.array(['new_entry1', 'new_entry2'], dtype=str)
|
||||||
np.testing.assert_array_equal(
|
np.testing.assert_array_equal(
|
||||||
np.concatenate((entries, new_entries)), stored_actual)
|
np.concatenate((entries, new_entries)), stored_actual)
|
||||||
|
|
||||||
|
workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'StringIndexCreate',
|
||||||
|
[],
|
||||||
|
['index2']))
|
||||||
|
|
||||||
|
workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'IndexLoad',
|
||||||
|
['index2', 'stored_entries'],
|
||||||
|
[],
|
||||||
|
skip_first_entry=1))
|
||||||
|
|
||||||
|
workspace.RunOperatorOnce(core.CreateOperator(
|
||||||
|
'IndexSize',
|
||||||
|
['index2'],
|
||||||
|
['index2_size']))
|
||||||
|
index2_size = workspace.FetchBlob('index2_size')
|
||||||
|
self.assertEquals(index2_size, 5)
|
||||||
|
|
|
||||||
76
caffe2/python/operator_test/one_hot_ops_test.py
Normal file
76
caffe2/python/operator_test/one_hot_ops_test.py
Normal file
|
|
@ -0,0 +1,76 @@
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from caffe2.python import core
|
||||||
|
from hypothesis import given
|
||||||
|
import caffe2.python.hypothesis_test_util as hu
|
||||||
|
import hypothesis.strategies as st
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def _one_hots():
|
||||||
|
index_size = st.integers(min_value=1, max_value=5)
|
||||||
|
lengths = st.lists(
|
||||||
|
elements=st.integers(min_value=0, max_value=5))
|
||||||
|
return st.tuples(index_size, lengths).flatmap(
|
||||||
|
lambda x: st.tuples(
|
||||||
|
st.just(x[0]),
|
||||||
|
st.just(x[1]),
|
||||||
|
st.lists(
|
||||||
|
elements=st.integers(min_value=0, max_value=x[0] - 1),
|
||||||
|
min_size=sum(x[1]),
|
||||||
|
max_size=sum(x[1]))))
|
||||||
|
|
||||||
|
|
||||||
|
class TestOneHotOps(hu.HypothesisTestCase):
|
||||||
|
@given(
|
||||||
|
hot_indices=hu.tensor(
|
||||||
|
min_dim=1, max_dim=1, dtype=np.int64,
|
||||||
|
elements=st.integers(min_value=0, max_value=42)),
|
||||||
|
end_padding=st.integers(min_value=0, max_value=2))
|
||||||
|
def test_one_hot(self, hot_indices, end_padding):
|
||||||
|
|
||||||
|
def one_hot_ref(hot_indices, size):
|
||||||
|
out = np.zeros([len(hot_indices), size], dtype=float)
|
||||||
|
x = enumerate(hot_indices)
|
||||||
|
for i, x in enumerate(hot_indices):
|
||||||
|
out[i, x] = 1.
|
||||||
|
return (out, )
|
||||||
|
|
||||||
|
size = np.array(max(hot_indices) + end_padding + 1, dtype=np.int64)
|
||||||
|
if size == 0:
|
||||||
|
size = 1
|
||||||
|
op = core.CreateOperator('OneHot', ['hot_indices', 'size'], ['output'])
|
||||||
|
self.assertReferenceChecks(
|
||||||
|
hu.cpu_do,
|
||||||
|
op,
|
||||||
|
[hot_indices, size],
|
||||||
|
one_hot_ref)
|
||||||
|
|
||||||
|
@given(hot_indices=_one_hots())
|
||||||
|
def test_segment_one_hot(self, hot_indices):
|
||||||
|
index_size, lengths, indices = hot_indices
|
||||||
|
|
||||||
|
index_size = np.array(index_size, dtype=np.int64)
|
||||||
|
lengths = np.array(lengths, dtype=np.int32)
|
||||||
|
indices = np.array(indices, dtype=np.int64)
|
||||||
|
|
||||||
|
def segment_one_hot_ref(lengths, hot_indices, size):
|
||||||
|
offset = 0
|
||||||
|
out = np.zeros([len(lengths), size], dtype=float)
|
||||||
|
for i, length in enumerate(lengths):
|
||||||
|
for idx in hot_indices[offset:offset + length]:
|
||||||
|
out[i, idx] = 1.
|
||||||
|
offset += length
|
||||||
|
return (out, )
|
||||||
|
|
||||||
|
op = core.CreateOperator(
|
||||||
|
'SegmentOneHot',
|
||||||
|
['lengths', 'hot_indices', 'size'],
|
||||||
|
['output'])
|
||||||
|
self.assertReferenceChecks(
|
||||||
|
hu.cpu_do,
|
||||||
|
op,
|
||||||
|
[lengths, indices, index_size],
|
||||||
|
segment_one_hot_ref)
|
||||||
|
|
@ -83,6 +83,10 @@ def logsumexp_grad(grad_out, outputs, inputs):
|
||||||
axis=0) * np.exp(inputs[0])
|
axis=0) * np.exp(inputs[0])
|
||||||
|
|
||||||
|
|
||||||
|
def logmeanexp(x):
|
||||||
|
return np.log(np.mean(np.exp(x), axis=0))
|
||||||
|
|
||||||
|
|
||||||
def mean(x):
|
def mean(x):
|
||||||
return np.mean(x, axis=0)
|
return np.mean(x, axis=0)
|
||||||
|
|
||||||
|
|
@ -94,6 +98,30 @@ def mean_grad(grad_out, outputs, inputs):
|
||||||
axis=0)
|
axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
def max(x):
|
||||||
|
return np.amax(x, axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
def max_grad(grad_out, outputs, inputs):
|
||||||
|
flat_inputs = inputs[0].flatten()
|
||||||
|
flat_outputs = np.array(outputs[0]).flatten()
|
||||||
|
flat_grad_in = np.zeros(flat_inputs.shape)
|
||||||
|
flat_grad_out = np.array(grad_out).flatten()
|
||||||
|
blocks = inputs[0].shape[0]
|
||||||
|
block_size = flat_inputs.shape[0] // blocks
|
||||||
|
|
||||||
|
for i in range(block_size):
|
||||||
|
out_grad = flat_grad_out[i]
|
||||||
|
out = flat_outputs[i]
|
||||||
|
for j in range(blocks):
|
||||||
|
idx = j * block_size + i
|
||||||
|
if out == flat_inputs[idx]:
|
||||||
|
flat_grad_in[idx] = out_grad
|
||||||
|
break
|
||||||
|
|
||||||
|
return np.resize(flat_grad_in, inputs[0].shape)
|
||||||
|
|
||||||
|
|
||||||
REFERENCES_ALL = [
|
REFERENCES_ALL = [
|
||||||
('Sum', partial(np.sum, axis=0), sum_grad),
|
('Sum', partial(np.sum, axis=0), sum_grad),
|
||||||
]
|
]
|
||||||
|
|
@ -101,7 +129,10 @@ REFERENCES_ALL = [
|
||||||
REFERENCES_SORTED = [
|
REFERENCES_SORTED = [
|
||||||
('RangeSum', partial(np.sum, axis=0), sum_grad),
|
('RangeSum', partial(np.sum, axis=0), sum_grad),
|
||||||
('RangeLogSumExp', logsumexp, logsumexp_grad),
|
('RangeLogSumExp', logsumexp, logsumexp_grad),
|
||||||
|
# gradient is the same as sum
|
||||||
|
('RangeLogMeanExp', logmeanexp, logsumexp_grad),
|
||||||
('RangeMean', mean, mean_grad),
|
('RangeMean', mean, mean_grad),
|
||||||
|
('RangeMax', max, max_grad),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -185,3 +185,37 @@ class TestSequenceOps(hu.HypothesisTestCase):
|
||||||
op,
|
op,
|
||||||
[padded_data, padded_lengths],
|
[padded_data, padded_lengths],
|
||||||
partial(_gather_padding_ref, start_pad_width, end_pad_width))
|
partial(_gather_padding_ref, start_pad_width, end_pad_width))
|
||||||
|
|
||||||
|
@given(data=hu.tensor(min_dim=3, max_dim=3, dtype=np.float32,
|
||||||
|
elements=st.floats(min_value=-np.inf,
|
||||||
|
max_value=np.inf),
|
||||||
|
min_value=1, max_value=10),
|
||||||
|
**hu.gcs_cpu_only)
|
||||||
|
def test_reverse_packed_segs(self, data, gc, dc):
|
||||||
|
max_length = data.shape[0]
|
||||||
|
batch_size = data.shape[1]
|
||||||
|
lengths = np.random.randint(max_length + 1, size=batch_size)
|
||||||
|
|
||||||
|
op = core.CreateOperator(
|
||||||
|
"ReversePackedSegs",
|
||||||
|
["data", "lengths"],
|
||||||
|
["reversed_data"])
|
||||||
|
|
||||||
|
def op_ref(data, lengths):
|
||||||
|
rev_data = np.array(data, copy=True)
|
||||||
|
for i in range(batch_size):
|
||||||
|
seg_length = lengths[i]
|
||||||
|
for j in range(seg_length):
|
||||||
|
rev_data[j][i] = data[seg_length - 1 - j][i]
|
||||||
|
return (rev_data,)
|
||||||
|
|
||||||
|
def op_grad_ref(grad_out, outputs, inputs):
|
||||||
|
return op_ref(grad_out, inputs[1]) + (None,)
|
||||||
|
|
||||||
|
self.assertReferenceChecks(
|
||||||
|
device_option=gc,
|
||||||
|
op=op,
|
||||||
|
inputs=[data, lengths],
|
||||||
|
reference=op_ref,
|
||||||
|
output_to_grad='reversed_data',
|
||||||
|
grad_reference=op_grad_ref)
|
||||||
|
|
|
||||||
106
caffe2/python/operator_test/string_ops_test.py
Normal file
106
caffe2/python/operator_test/string_ops_test.py
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from caffe2.python import core
|
||||||
|
from hypothesis import given
|
||||||
|
import caffe2.python.hypothesis_test_util as hu
|
||||||
|
import hypothesis.strategies as st
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def _string_lists(alphabet=None):
|
||||||
|
return st.lists(
|
||||||
|
elements=st.text(alphabet=alphabet, average_size=3),
|
||||||
|
min_size=0,
|
||||||
|
max_size=3)
|
||||||
|
|
||||||
|
|
||||||
|
class TestStringOps(hu.HypothesisTestCase):
|
||||||
|
@given(strings=_string_lists())
|
||||||
|
def test_string_prefix(self, strings):
|
||||||
|
length = 3
|
||||||
|
# although we are utf-8 encoding below to avoid python exceptions,
|
||||||
|
# StringPrefix op deals with byte-length prefixes, which may produce
|
||||||
|
# an invalid utf-8 string. The goal here is just to avoid python
|
||||||
|
# complaining about the unicode -> str conversion.
|
||||||
|
strings = np.array(
|
||||||
|
map(lambda a: a.encode('utf-8'), strings), dtype=np.object)
|
||||||
|
|
||||||
|
def string_prefix_ref(strings):
|
||||||
|
return (
|
||||||
|
np.array(map(lambda a: a[:length], strings), dtype=object), )
|
||||||
|
|
||||||
|
op = core.CreateOperator(
|
||||||
|
'StringPrefix',
|
||||||
|
['strings'],
|
||||||
|
['stripped'],
|
||||||
|
length=length)
|
||||||
|
self.assertReferenceChecks(
|
||||||
|
hu.cpu_do,
|
||||||
|
op,
|
||||||
|
[strings],
|
||||||
|
string_prefix_ref)
|
||||||
|
|
||||||
|
@given(strings=_string_lists())
|
||||||
|
def test_string_suffix(self, strings):
|
||||||
|
length = 3
|
||||||
|
strings = np.array(
|
||||||
|
map(lambda a: a.encode('utf-8'), strings), dtype=np.object)
|
||||||
|
|
||||||
|
def string_suffix_ref(strings):
|
||||||
|
return (
|
||||||
|
np.array(map(lambda a: a[-length:], strings), dtype=object), )
|
||||||
|
|
||||||
|
op = core.CreateOperator(
|
||||||
|
'StringSuffix',
|
||||||
|
['strings'],
|
||||||
|
['stripped'],
|
||||||
|
length=length)
|
||||||
|
self.assertReferenceChecks(
|
||||||
|
hu.cpu_do,
|
||||||
|
op,
|
||||||
|
[strings],
|
||||||
|
string_suffix_ref)
|
||||||
|
|
||||||
|
@given(strings=st.text(alphabet=['a', 'b'], average_size=3))
|
||||||
|
def test_string_starts_with(self, strings):
|
||||||
|
prefix = 'a'
|
||||||
|
strings = np.array(
|
||||||
|
map(lambda a: str(strings), strings), dtype=np.object)
|
||||||
|
|
||||||
|
def string_starts_with_ref(strings):
|
||||||
|
return (np.array(
|
||||||
|
map(lambda a: a.startswith(prefix), strings), dtype=bool), )
|
||||||
|
|
||||||
|
op = core.CreateOperator(
|
||||||
|
'StringStartsWith',
|
||||||
|
['strings'],
|
||||||
|
['bools'],
|
||||||
|
prefix=prefix)
|
||||||
|
self.assertReferenceChecks(
|
||||||
|
hu.cpu_do,
|
||||||
|
op,
|
||||||
|
[strings],
|
||||||
|
string_starts_with_ref)
|
||||||
|
|
||||||
|
@given(strings=st.text(alphabet=['a', 'b'], average_size=3))
|
||||||
|
def test_string_ends_with(self, strings):
|
||||||
|
suffix = 'a'
|
||||||
|
strings = np.array(
|
||||||
|
map(lambda a: str(strings), strings), dtype=np.object)
|
||||||
|
|
||||||
|
def string_ends_with_ref(strings):
|
||||||
|
return (np.array(
|
||||||
|
map(lambda a: a.endswith(suffix), strings), dtype=bool), )
|
||||||
|
|
||||||
|
op = core.CreateOperator(
|
||||||
|
'StringEndsWith',
|
||||||
|
['strings'],
|
||||||
|
['bools'],
|
||||||
|
suffix=suffix)
|
||||||
|
self.assertReferenceChecks(
|
||||||
|
hu.cpu_do,
|
||||||
|
op,
|
||||||
|
[strings],
|
||||||
|
string_ends_with_ref)
|
||||||
348
caffe2/python/schema.py
Normal file
348
caffe2/python/schema.py
Normal file
|
|
@ -0,0 +1,348 @@
|
||||||
|
"""
|
||||||
|
Defines a minimal set of data types that allow to represent datasets with
|
||||||
|
arbitrary nested structure, including objects of variable length, such as
|
||||||
|
maps and lists.
|
||||||
|
|
||||||
|
This defines a columnar storage format for such datasets on top of caffe2
|
||||||
|
tensors. In terms of capacity of representation, it can represent most of
|
||||||
|
the data types supported by Parquet, ORC, DWRF file formats.
|
||||||
|
|
||||||
|
See comments in operator_test/dataset_ops_test.py for a example and
|
||||||
|
walkthrough on how to use schema to store and iterate through a structured
|
||||||
|
in-memory dataset.
|
||||||
|
"""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from collections import OrderedDict
|
||||||
|
import logging
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _join_field_name(prefix, suffix):
|
||||||
|
if prefix and suffix:
|
||||||
|
return '{}:{}'.format(prefix, suffix)
|
||||||
|
elif prefix:
|
||||||
|
return prefix
|
||||||
|
elif suffix:
|
||||||
|
return suffix
|
||||||
|
else:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
class Field(object):
|
||||||
|
"""Represents an abstract field type in a dataset.
|
||||||
|
"""
|
||||||
|
def __init__(self, children):
|
||||||
|
"""Derived classes must call this after their initialization."""
|
||||||
|
self._parent = (None, 0)
|
||||||
|
offset = 0
|
||||||
|
self._field_offsets = []
|
||||||
|
for child in children:
|
||||||
|
self._field_offsets.append(offset)
|
||||||
|
offset += len(child.field_names())
|
||||||
|
self._field_offsets.append(offset)
|
||||||
|
|
||||||
|
def field_names(self):
|
||||||
|
"""Return the children field names for this field."""
|
||||||
|
raise NotImplementedError('Field is an abstract class.')
|
||||||
|
|
||||||
|
def field_types(self):
|
||||||
|
"""Return the numpy.dtype for each of the children fields."""
|
||||||
|
raise NotImplementedError('Field is an abstract class.')
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
"""Clone this Field along with its children."""
|
||||||
|
raise NotImplementedError('Field is an abstract class.')
|
||||||
|
|
||||||
|
def _set_parent(self, parent, relative_id):
|
||||||
|
self._parent = (parent, relative_id)
|
||||||
|
|
||||||
|
def slice(self):
|
||||||
|
"""
|
||||||
|
Returns a slice representing the range of field ids that belong to
|
||||||
|
this field. This slice can be used to index a list of fields.
|
||||||
|
|
||||||
|
E.g.:
|
||||||
|
|
||||||
|
>>> s = Struct(
|
||||||
|
>>> ('a', Scalar()),
|
||||||
|
>>> ('b', Struct(
|
||||||
|
>>> ('b1', Scalar()),
|
||||||
|
>>> ('b2', Scalar()),
|
||||||
|
>>> )),
|
||||||
|
>>> ('c', Scalar()),
|
||||||
|
>>> )
|
||||||
|
>>> field_data = ['da', 'db1', 'db2', 'dc']
|
||||||
|
>>> field_data[s.b.split()]
|
||||||
|
['db1', 'db2']
|
||||||
|
"""
|
||||||
|
base_id = self._child_base_id()
|
||||||
|
return slice(base_id, base_id + len(self.field_names()))
|
||||||
|
|
||||||
|
def _child_base_id(self, child_index=None):
|
||||||
|
"""Get the base id of the given child"""
|
||||||
|
p, i = self._parent
|
||||||
|
pos = 0 if child_index is None else self._field_offsets[child_index]
|
||||||
|
if p:
|
||||||
|
pos += p._child_base_id(i)
|
||||||
|
return pos
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
"""Equivalance of two schemas"""
|
||||||
|
return ((self.field_names() == other.field_names()) and
|
||||||
|
(self.field_types() == other.field_types()))
|
||||||
|
|
||||||
|
class List(Field):
|
||||||
|
"""Represents a variable-length list.
|
||||||
|
|
||||||
|
Values of a list can also be complex fields such as Lists and Structs.
|
||||||
|
In addition to the fields exposed by its `values` field, a List exposes an
|
||||||
|
additional `lengths` field, which will contain the size of each list under
|
||||||
|
the parent domain.
|
||||||
|
"""
|
||||||
|
def __init__(self, values):
|
||||||
|
assert isinstance(values, Field)
|
||||||
|
self.lengths = Scalar(np.int32)
|
||||||
|
self.values = values.clone()
|
||||||
|
self.lengths._set_parent(self, 0)
|
||||||
|
self.values._set_parent(self, 1)
|
||||||
|
Field.__init__(self, [self.lengths, self.values])
|
||||||
|
|
||||||
|
def field_names(self):
|
||||||
|
value_fields = self.values.field_names()
|
||||||
|
return (
|
||||||
|
['lengths'] +
|
||||||
|
[_join_field_name('values', v) for v in value_fields])
|
||||||
|
|
||||||
|
def field_types(self):
|
||||||
|
return self.lengths.field_types() + self.values.field_types()
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
return List(self.values)
|
||||||
|
|
||||||
|
|
||||||
|
class Struct(Field):
|
||||||
|
"""Represents a named list of fields sharing the same domain.
|
||||||
|
"""
|
||||||
|
def __init__(self, *fields):
|
||||||
|
for field in fields:
|
||||||
|
assert len(field) == 2
|
||||||
|
assert field[0], 'Field names cannot be empty'
|
||||||
|
assert field[0] != 'lengths', (
|
||||||
|
'Struct cannot contain a field named `lengths`.')
|
||||||
|
assert isinstance(field[1], Field)
|
||||||
|
fields = [(name, field.clone()) for name, field in fields]
|
||||||
|
for id, (name, field) in enumerate(fields):
|
||||||
|
field._set_parent(self, id)
|
||||||
|
self.fields = OrderedDict(fields)
|
||||||
|
Field.__init__(self, self.fields.values())
|
||||||
|
|
||||||
|
def field_names(self):
|
||||||
|
names = []
|
||||||
|
for name, field in self.fields.items():
|
||||||
|
names += [_join_field_name(name, f) for f in field.field_names()]
|
||||||
|
return names
|
||||||
|
|
||||||
|
def field_types(self):
|
||||||
|
types = []
|
||||||
|
for name, field in self.fields.items():
|
||||||
|
types += field.field_types()
|
||||||
|
return types
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
return Struct(*self.fields.items())
|
||||||
|
|
||||||
|
def __getattr__(self, item):
|
||||||
|
return self.fields[item]
|
||||||
|
|
||||||
|
|
||||||
|
class Scalar(Field):
|
||||||
|
"""Represents a typed scalar or tensor of fixed shape.
|
||||||
|
|
||||||
|
A Scalar is a leaf in a schema tree, translating to exactly one tensor in
|
||||||
|
the dataset's underlying storage.
|
||||||
|
|
||||||
|
Usually, the tensor storing the actual values of this field is a 1D tensor,
|
||||||
|
representing a series of values in its domain. It is possible however to
|
||||||
|
have higher rank values stored as a Scalar, as long as all entries have
|
||||||
|
the same shape.
|
||||||
|
|
||||||
|
E.g.:
|
||||||
|
|
||||||
|
Scalar(np.float64)
|
||||||
|
|
||||||
|
Scalar field of type float32. Caffe2 will expect readers and
|
||||||
|
datasets to expose it as a 1D tensor of doubles (vector), where
|
||||||
|
the size of the vector is determined by this fields' domain.
|
||||||
|
|
||||||
|
Scalar((np.int32, 5))
|
||||||
|
|
||||||
|
Tensor field of type int32. Caffe2 will expect readers and
|
||||||
|
datasets to implement it as a 2D tensor (matrix) of shape (L, 5),
|
||||||
|
where L is determined by this fields' domain.
|
||||||
|
|
||||||
|
Scalar((str, (10, 20)))
|
||||||
|
|
||||||
|
Tensor field of type str. Caffe2 will expect readers and
|
||||||
|
datasets to implement it as a 3D tensor of shape (L, 10, 20),
|
||||||
|
where L is determined by this fields' domain.
|
||||||
|
|
||||||
|
If the field type is unknown at construction time, call Scalar(), that will
|
||||||
|
default to np.void as its dtype.
|
||||||
|
|
||||||
|
It is an error to pass a structured dtype to Scalar, since it would contain
|
||||||
|
more than one field. Instead, use from_dtype, which will construct
|
||||||
|
a nested `Struct` field reflecting the given dtype's structure.
|
||||||
|
"""
|
||||||
|
def __init__(self, dtype=None):
|
||||||
|
self._original_dtype = dtype
|
||||||
|
self.dtype = np.dtype(dtype or np.void)
|
||||||
|
assert not self.dtype.fields, (
|
||||||
|
'Cannot create Scalar with a structured dtype. ' +
|
||||||
|
'Use from_dtype instead.')
|
||||||
|
Field.__init__(self, [])
|
||||||
|
|
||||||
|
def field_names(self):
|
||||||
|
return ['']
|
||||||
|
|
||||||
|
def field_types(self):
|
||||||
|
return [self.dtype]
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
return Scalar(self._original_dtype)
|
||||||
|
|
||||||
|
def id(self):
|
||||||
|
"""
|
||||||
|
Return the zero-indexed position of this scalar field in its schema.
|
||||||
|
Used in order to index into the field_blob list returned by readers or
|
||||||
|
accepted by writers.
|
||||||
|
"""
|
||||||
|
return self._child_base_id()
|
||||||
|
|
||||||
|
|
||||||
|
def Map(keys, values, keys_name='keys', values_name='values'):
|
||||||
|
"""A map is a List of Struct containing keys and values fields.
|
||||||
|
Optionally, you can provide custom name for the key and value fields.
|
||||||
|
"""
|
||||||
|
return List(Struct((keys_name, keys), (values_name, values)))
|
||||||
|
|
||||||
|
|
||||||
|
def from_dtype(dtype, _outer_shape=()):
|
||||||
|
"""Constructs a Caffe2 schema from the given numpy's dtype.
|
||||||
|
|
||||||
|
Numpy supports scalar, array-like and structured datatypes, as long as
|
||||||
|
all the shapes are fixed. This function breaks down the given dtype into
|
||||||
|
a Caffe2 schema containing `Struct` and `Scalar` types.
|
||||||
|
|
||||||
|
Fields containing byte offsets are not currently supported.
|
||||||
|
"""
|
||||||
|
if not isinstance(dtype, np.dtype):
|
||||||
|
# wrap into a ndtype
|
||||||
|
shape = _outer_shape
|
||||||
|
dtype = np.dtype((dtype, _outer_shape))
|
||||||
|
else:
|
||||||
|
# concatenate shapes if necessary
|
||||||
|
shape = _outer_shape + dtype.shape
|
||||||
|
if shape != dtype.shape:
|
||||||
|
dtype = np.dtype((dtype.base, shape))
|
||||||
|
|
||||||
|
if not dtype.fields:
|
||||||
|
return Scalar(dtype)
|
||||||
|
|
||||||
|
struct_fields = []
|
||||||
|
for name, (fdtype, offset) in dtype.fields:
|
||||||
|
assert offset == 0, ('Fields with byte offsets are not supported.')
|
||||||
|
struct_fields += (name, from_dtype(fdtype, _outer_shape=shape))
|
||||||
|
return Struct(*struct_fields)
|
||||||
|
|
||||||
|
|
||||||
|
class _SchemaNode(object):
|
||||||
|
"""This is a private class used to represent a Schema Node"""
|
||||||
|
def __init__(self, name, type_str=''):
|
||||||
|
self.name = name
|
||||||
|
self.children = []
|
||||||
|
self.type_str = type_str
|
||||||
|
self.field = None
|
||||||
|
|
||||||
|
def add_child(self, name, type_str=''):
|
||||||
|
for child in self.children:
|
||||||
|
if child.name == name and child.type_str == type_str:
|
||||||
|
return child
|
||||||
|
child = _SchemaNode(name, type_str)
|
||||||
|
self.children.append(child)
|
||||||
|
return child
|
||||||
|
|
||||||
|
def get_field(self):
|
||||||
|
|
||||||
|
list_names = ['lengths', 'values']
|
||||||
|
map_names = ['lengths', 'keys', 'values']
|
||||||
|
|
||||||
|
if len(self.children) == 0 or self.field is not None:
|
||||||
|
assert self.field is not None
|
||||||
|
return self.field
|
||||||
|
|
||||||
|
child_names = []
|
||||||
|
for child in self.children:
|
||||||
|
child_names.append(child.name)
|
||||||
|
|
||||||
|
if (set(child_names) == set(list_names)):
|
||||||
|
for child in self.children:
|
||||||
|
if child.name == 'values':
|
||||||
|
self.field = List(child.get_field())
|
||||||
|
self.type_str = "List"
|
||||||
|
return self.field
|
||||||
|
|
||||||
|
elif (set(child_names) == set(map_names)):
|
||||||
|
for child in self.children:
|
||||||
|
if child.name == 'keys':
|
||||||
|
key_field = child.get_field()
|
||||||
|
elif child.name == 'values':
|
||||||
|
values_field = child.get_field()
|
||||||
|
self.field = Map(key_field, values_field)
|
||||||
|
self.type_str = "Map"
|
||||||
|
return self.field
|
||||||
|
|
||||||
|
else:
|
||||||
|
struct_fields = []
|
||||||
|
for child in self.children:
|
||||||
|
if child.field is not None:
|
||||||
|
struct_fields.append((child.name, child.field))
|
||||||
|
else:
|
||||||
|
struct_fields.append((child.name, child.get_field()))
|
||||||
|
|
||||||
|
self.field = Struct(*struct_fields)
|
||||||
|
self.type_str = "Struct"
|
||||||
|
return self.field
|
||||||
|
|
||||||
|
def print_recursively(self):
|
||||||
|
for child in self.children:
|
||||||
|
child.print_recursively()
|
||||||
|
logger.info("Printing node: Name and type")
|
||||||
|
logger.info(self.name)
|
||||||
|
logger.info(self.type_str)
|
||||||
|
|
||||||
|
|
||||||
|
def from_column_list(column_names, column_types):
|
||||||
|
|
||||||
|
root = _SchemaNode('root', 'Struct')
|
||||||
|
for column_name, column_type in zip(column_names, column_types):
|
||||||
|
columns = column_name.split(':')
|
||||||
|
current = root
|
||||||
|
for i in range(len(columns)):
|
||||||
|
name = columns[i]
|
||||||
|
type_str = ''
|
||||||
|
field = None
|
||||||
|
if i == len(columns) - 1:
|
||||||
|
type_str = column_type
|
||||||
|
field = Scalar(column_type)
|
||||||
|
next = current.add_child(name, type_str)
|
||||||
|
if field is not None:
|
||||||
|
next.field = field
|
||||||
|
current = next
|
||||||
|
|
||||||
|
return root.get_field()
|
||||||
82
caffe2/python/sparse_to_dense_mask_test.py
Normal file
82
caffe2/python/sparse_to_dense_mask_test.py
Normal file
|
|
@ -0,0 +1,82 @@
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from caffe2.python import core, workspace
|
||||||
|
from caffe2.python.test_util import TestCase
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class TestSparseToDenseMask(TestCase):
|
||||||
|
|
||||||
|
def test_sparse_to_dense_mask_float(self):
|
||||||
|
op = core.CreateOperator(
|
||||||
|
'SparseToDenseMask',
|
||||||
|
['indices', 'values', 'default', 'lengths'],
|
||||||
|
['output'],
|
||||||
|
mask=[999999999, 2, 6])
|
||||||
|
workspace.FeedBlob(
|
||||||
|
'indices',
|
||||||
|
np.array([2, 4, 6, 1, 2, 999999999, 2], dtype=np.int32))
|
||||||
|
workspace.FeedBlob(
|
||||||
|
'values',
|
||||||
|
np.array([1, 2, 3, 4, 5, 6, 7], dtype=np.float))
|
||||||
|
workspace.FeedBlob('default', np.array(-1, dtype=np.float))
|
||||||
|
workspace.FeedBlob('lengths', np.array([3, 4], dtype=np.int32))
|
||||||
|
workspace.RunOperatorOnce(op)
|
||||||
|
output = workspace.FetchBlob('output')
|
||||||
|
expected = np.array([[-1, 1, 3], [6, 7, -1]], dtype=np.float)
|
||||||
|
self.assertEqual(output.shape, expected.shape)
|
||||||
|
self.assertFalse(np.any(output - expected))
|
||||||
|
|
||||||
|
def test_sparse_to_dense_mask_string(self):
|
||||||
|
op = core.CreateOperator(
|
||||||
|
'SparseToDenseMask',
|
||||||
|
['indices', 'values', 'default', 'lengths'],
|
||||||
|
['output'],
|
||||||
|
mask=[999999999, 2, 6])
|
||||||
|
workspace.FeedBlob(
|
||||||
|
'indices',
|
||||||
|
np.array([2, 4, 6, 1, 2, 999999999, 2], dtype=np.int32))
|
||||||
|
workspace.FeedBlob(
|
||||||
|
'values',
|
||||||
|
np.array(['1', '2', '3', '4', '5', '6', '7'], dtype=np.str))
|
||||||
|
workspace.FeedBlob('default', np.array('-1', dtype=np.str))
|
||||||
|
workspace.FeedBlob('lengths', np.array([3, 4], dtype=np.int32))
|
||||||
|
workspace.RunOperatorOnce(op)
|
||||||
|
output = workspace.FetchBlob('output')
|
||||||
|
expected = np.array([['-1', '1', '3'], ['6', '7', '-1']], dtype=np.str)
|
||||||
|
self.assertEqual(output.shape, expected.shape)
|
||||||
|
self.assertTrue(np.all(np.equal(output, expected)))
|
||||||
|
|
||||||
|
def test_sparse_to_dense_mask_empty_lengths(self):
|
||||||
|
op = core.CreateOperator(
|
||||||
|
'SparseToDenseMask',
|
||||||
|
['indices', 'values', 'default', 'lengths'],
|
||||||
|
['output'],
|
||||||
|
mask=[1, 2, 6])
|
||||||
|
workspace.FeedBlob('indices', np.array([2, 4, 6], dtype=np.int32))
|
||||||
|
workspace.FeedBlob('values', np.array([1, 2, 3], dtype=np.float))
|
||||||
|
workspace.FeedBlob('default', np.array(-1, dtype=np.float))
|
||||||
|
workspace.FeedBlob('lengths', np.array([], dtype=np.int32))
|
||||||
|
workspace.RunOperatorOnce(op)
|
||||||
|
output = workspace.FetchBlob('output')
|
||||||
|
expected = np.array([-1, 1, 3], dtype=np.float)
|
||||||
|
self.assertEqual(output.shape, expected.shape)
|
||||||
|
self.assertFalse(np.any(output - expected))
|
||||||
|
|
||||||
|
def test_sparse_to_dense_mask_no_lengths(self):
|
||||||
|
op = core.CreateOperator(
|
||||||
|
'SparseToDenseMask',
|
||||||
|
['indices', 'values', 'default'],
|
||||||
|
['output'],
|
||||||
|
mask=[1, 2, 6])
|
||||||
|
workspace.FeedBlob('indices', np.array([2, 4, 6], dtype=np.int32))
|
||||||
|
workspace.FeedBlob('values', np.array([1, 2, 3], dtype=np.float))
|
||||||
|
workspace.FeedBlob('default', np.array(-1, dtype=np.float))
|
||||||
|
workspace.RunOperatorOnce(op)
|
||||||
|
output = workspace.FetchBlob('output')
|
||||||
|
expected = np.array([-1, 1, 3], dtype=np.float)
|
||||||
|
self.assertEqual(output.shape, expected.shape)
|
||||||
|
self.assertFalse(np.any(output - expected))
|
||||||
|
|
@ -6,10 +6,18 @@ import shutil
|
||||||
import socket
|
import socket
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from caffe2.proto import caffe2_pb2
|
from caffe2.proto import caffe2_pb2
|
||||||
from caffe2.python import scope, utils
|
from caffe2.python import scope, utils
|
||||||
from ._import_c_extension import * # noqa
|
from ._import_c_extension import * # noqa
|
||||||
|
|
||||||
|
# Python 2 and 3 compatibility: test if basestring exists
|
||||||
|
try:
|
||||||
|
basestring # NOQA
|
||||||
|
except NameError:
|
||||||
|
# This is python3 so we define basestring.
|
||||||
|
basestring = str
|
||||||
|
|
||||||
|
|
||||||
def _GetFreeFlaskPort():
|
def _GetFreeFlaskPort():
|
||||||
"""Get a free flask port."""
|
"""Get a free flask port."""
|
||||||
|
|
@ -86,7 +94,9 @@ def ResetWorkspace(root_folder=None):
|
||||||
return cc_ResetWorkspace(root_folder)
|
return cc_ResetWorkspace(root_folder)
|
||||||
|
|
||||||
|
|
||||||
def CreateNet(net, input_blobs=[]):
|
def CreateNet(net, input_blobs=None):
|
||||||
|
if input_blobs is None:
|
||||||
|
input_blobs = []
|
||||||
for input_blob in input_blobs:
|
for input_blob in input_blobs:
|
||||||
CreateBlob(input_blob)
|
CreateBlob(input_blob)
|
||||||
return cc_CreateNet(StringfyProto(net))
|
return cc_CreateNet(StringfyProto(net))
|
||||||
|
|
@ -112,6 +122,14 @@ def RunPlan(plan):
|
||||||
return cc_RunPlan(StringfyProto(plan))
|
return cc_RunPlan(StringfyProto(plan))
|
||||||
|
|
||||||
|
|
||||||
|
def _StringifyBlobName(name):
|
||||||
|
if isinstance(name, basestring):
|
||||||
|
return name
|
||||||
|
assert type(name).__name__ == 'BlobReference', \
|
||||||
|
"Expected a string or BlobReference"
|
||||||
|
return str(name)
|
||||||
|
|
||||||
|
|
||||||
def FeedBlob(name, arr, device_option=None):
|
def FeedBlob(name, arr, device_option=None):
|
||||||
"""Feeds a blob into the workspace.
|
"""Feeds a blob into the workspace.
|
||||||
|
|
||||||
|
|
@ -125,6 +143,10 @@ def FeedBlob(name, arr, device_option=None):
|
||||||
"""
|
"""
|
||||||
if type(arr) is caffe2_pb2.TensorProto:
|
if type(arr) is caffe2_pb2.TensorProto:
|
||||||
arr = utils.Caffe2TensorToNumpyArray(arr)
|
arr = utils.Caffe2TensorToNumpyArray(arr)
|
||||||
|
if type(arr) is np.ndarray and arr.dtype.kind == 'S':
|
||||||
|
# Plain NumPy strings are weird, let's use objects instead
|
||||||
|
arr = arr.astype(np.object)
|
||||||
|
name = _StringifyBlobName(name)
|
||||||
if device_option is not None:
|
if device_option is not None:
|
||||||
return cc_FeedBlob(name, arr, StringfyProto(device_option))
|
return cc_FeedBlob(name, arr, StringfyProto(device_option))
|
||||||
elif scope.DEVICESCOPE is not None:
|
elif scope.DEVICESCOPE is not None:
|
||||||
|
|
@ -133,6 +155,40 @@ def FeedBlob(name, arr, device_option=None):
|
||||||
return cc_FeedBlob(name, arr)
|
return cc_FeedBlob(name, arr)
|
||||||
|
|
||||||
|
|
||||||
|
def FetchBlob(name):
|
||||||
|
"""Fetches a blob from the workspace.
|
||||||
|
|
||||||
|
Inputs:
|
||||||
|
name: the name of the blob - a string or a BlobReference
|
||||||
|
Returns:
|
||||||
|
Fetched blob (numpy array or string) if successful
|
||||||
|
"""
|
||||||
|
name = _StringifyBlobName(name)
|
||||||
|
return cc_FetchBlob(name)
|
||||||
|
|
||||||
|
|
||||||
|
class _BlobDict(object):
|
||||||
|
"""Provides python dict compatible way to do fetching and feeding"""
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return FetchBlob(key)
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
return FeedBlob(key, value)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(Blobs())
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return Blobs().__iter__()
|
||||||
|
|
||||||
|
def __contains__(self, item):
|
||||||
|
return HasBlob(item)
|
||||||
|
|
||||||
|
|
||||||
|
blobs = _BlobDict()
|
||||||
|
|
||||||
|
|
||||||
class Model(object):
|
class Model(object):
|
||||||
def __init__(self, net, parameters, inputs, outputs, device_option=None):
|
def __init__(self, net, parameters, inputs, outputs, device_option=None):
|
||||||
"""Initializes a model.
|
"""Initializes a model.
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,8 @@ from caffe2.python import core, test_util, workspace
|
||||||
class TestWorkspace(unittest.TestCase):
|
class TestWorkspace(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.net = core.Net("test-net")
|
self.net = core.Net("test-net")
|
||||||
self.net.ConstantFill([], "testblob", shape=[1, 2, 3, 4], value=1.0)
|
self.testblob_ref = self.net.ConstantFill(
|
||||||
|
[], "testblob", shape=[1, 2, 3, 4], value=1.0)
|
||||||
workspace.ResetWorkspace()
|
workspace.ResetWorkspace()
|
||||||
|
|
||||||
def testRootFolder(self):
|
def testRootFolder(self):
|
||||||
|
|
@ -64,6 +65,20 @@ class TestWorkspace(unittest.TestCase):
|
||||||
self.assertEqual(fetched_again.shape, (1, 2, 3, 4))
|
self.assertEqual(fetched_again.shape, (1, 2, 3, 4))
|
||||||
np.testing.assert_array_equal(fetched_again, 2.0)
|
np.testing.assert_array_equal(fetched_again, 2.0)
|
||||||
|
|
||||||
|
def testFetchFeedBlobViaBlobReference(self):
|
||||||
|
self.assertEqual(
|
||||||
|
workspace.RunNetOnce(self.net.Proto().SerializeToString()), True)
|
||||||
|
fetched = workspace.FetchBlob(self.testblob_ref)
|
||||||
|
# check if fetched is correct.
|
||||||
|
self.assertEqual(fetched.shape, (1, 2, 3, 4))
|
||||||
|
np.testing.assert_array_equal(fetched, 1.0)
|
||||||
|
fetched[:] = 2.0
|
||||||
|
self.assertEqual(workspace.FeedBlob(self.testblob_ref, fetched), True)
|
||||||
|
fetched_again = workspace.FetchBlob("testblob") # fetch by name now
|
||||||
|
self.assertEqual(fetched_again.shape, (1, 2, 3, 4))
|
||||||
|
np.testing.assert_array_equal(fetched_again, 2.0)
|
||||||
|
|
||||||
|
|
||||||
def testFetchFeedBlobTypes(self):
|
def testFetchFeedBlobTypes(self):
|
||||||
for dtype in [np.float16, np.float32, np.float64, np.bool,
|
for dtype in [np.float16, np.float32, np.float64, np.bool,
|
||||||
np.int8, np.int16, np.int32, np.int64,
|
np.int8, np.int16, np.int32, np.int64,
|
||||||
|
|
@ -101,7 +116,8 @@ class TestWorkspace(unittest.TestCase):
|
||||||
strs = np.array([
|
strs = np.array([
|
||||||
' '.join(10 * ['long string']),
|
' '.join(10 * ['long string']),
|
||||||
' '.join(128 * ['very long string']),
|
' '.join(128 * ['very long string']),
|
||||||
'small string'])
|
'small \0\1\2 string',
|
||||||
|
"Hello, world! I have special \0 symbols \1!"])
|
||||||
workspace.FeedBlob('my_str_tensor', strs)
|
workspace.FeedBlob('my_str_tensor', strs)
|
||||||
strs2 = workspace.FetchBlob('my_str_tensor')
|
strs2 = workspace.FetchBlob('my_str_tensor')
|
||||||
self.assertEqual(strs.shape, strs2.shape)
|
self.assertEqual(strs.shape, strs2.shape)
|
||||||
|
|
@ -117,6 +133,32 @@ class TestWorkspace(unittest.TestCase):
|
||||||
for i in range(0, strs.shape[0]):
|
for i in range(0, strs.shape[0]):
|
||||||
self.assertEqual(strs[i], strs2[i])
|
self.assertEqual(strs[i], strs2[i])
|
||||||
|
|
||||||
|
def testFetchFeedPlainString(self):
|
||||||
|
# this is actual string, not a tensor of strings
|
||||||
|
s = "Hello, world! I have special \0 symbols \1!"
|
||||||
|
workspace.FeedBlob('my_plain_string', s)
|
||||||
|
s2 = workspace.FetchBlob('my_plain_string')
|
||||||
|
self.assertEqual(s, s2)
|
||||||
|
|
||||||
|
def testFetchFeedViaBlobDict(self):
|
||||||
|
self.assertEqual(
|
||||||
|
workspace.RunNetOnce(self.net.Proto().SerializeToString()), True)
|
||||||
|
fetched = workspace.blobs["testblob"]
|
||||||
|
# check if fetched is correct.
|
||||||
|
self.assertEqual(fetched.shape, (1, 2, 3, 4))
|
||||||
|
np.testing.assert_array_equal(fetched, 1.0)
|
||||||
|
fetched[:] = 2.0
|
||||||
|
workspace.blobs["testblob"] = fetched
|
||||||
|
fetched_again = workspace.blobs["testblob"]
|
||||||
|
self.assertEqual(fetched_again.shape, (1, 2, 3, 4))
|
||||||
|
np.testing.assert_array_equal(fetched_again, 2.0)
|
||||||
|
|
||||||
|
self.assertTrue("testblob" in workspace.blobs)
|
||||||
|
self.assertFalse("non_existant" in workspace.blobs)
|
||||||
|
self.assertEqual(len(workspace.blobs), 1)
|
||||||
|
for key in workspace.blobs:
|
||||||
|
self.assertEqual(key, "testblob")
|
||||||
|
|
||||||
|
|
||||||
class TestMultiWorkspaces(unittest.TestCase):
|
class TestMultiWorkspaces(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,8 @@ void adagrad_update(
|
||||||
float epsilon,
|
float epsilon,
|
||||||
const float* lr,
|
const float* lr,
|
||||||
Context* context) {
|
Context* context) {
|
||||||
#pragma omp parallel for
|
// TODO(cxj): use OMP when it is reliable
|
||||||
|
// #pragma omp parallel for
|
||||||
for (auto i = 0; i < N; ++i) {
|
for (auto i = 0; i < N; ++i) {
|
||||||
float gi = g[i];
|
float gi = g[i];
|
||||||
float hi = nh[i] = h[i] + gi * gi;
|
float hi = nh[i] = h[i] + gi * gi;
|
||||||
|
|
@ -78,8 +79,8 @@ class SparseAdagradOp final : public Operator<Context> {
|
||||||
const auto* momentIn = Input(MOMENT_1).template data<T>();
|
const auto* momentIn = Input(MOMENT_1).template data<T>();
|
||||||
auto* gradOut = Output(OUTPUT_GRAD)->template mutable_data<T>();
|
auto* gradOut = Output(OUTPUT_GRAD)->template mutable_data<T>();
|
||||||
auto* momentOut = Output(OUTPUT_MOMENT_1)->template mutable_data<T>();
|
auto* momentOut = Output(OUTPUT_MOMENT_1)->template mutable_data<T>();
|
||||||
|
// TODO(cxj): use OMP when it is reliable
|
||||||
#pragma omp parallel for
|
// #pragma omp parallel for
|
||||||
for (auto i = 0; i < n; ++i) {
|
for (auto i = 0; i < n; ++i) {
|
||||||
auto idx = indices[i];
|
auto idx = indices[i];
|
||||||
if (block_size == 1) {
|
if (block_size == 1) {
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,8 @@ void ftrl_update(
|
||||||
T* new_nz,
|
T* new_nz,
|
||||||
const FtrlParams<T>& params,
|
const FtrlParams<T>& params,
|
||||||
Context* context) {
|
Context* context) {
|
||||||
#pragma omp parallel for
|
// TODO(cxj): use OMP when it is reliable
|
||||||
|
// #pragma omp parallel for
|
||||||
for (auto i = 0; i < N; ++i) {
|
for (auto i = 0; i < N; ++i) {
|
||||||
ftrl_compute(
|
ftrl_compute(
|
||||||
w[i],
|
w[i],
|
||||||
|
|
@ -93,7 +94,9 @@ void SparseFtrlOp<T>::DoRun() {
|
||||||
T* nz = n_z->template mutable_data<T>();
|
T* nz = n_z->template mutable_data<T>();
|
||||||
const SIndex* idxs = indices.template data<SIndex>();
|
const SIndex* idxs = indices.template data<SIndex>();
|
||||||
const T* g = grad.template data<T>();
|
const T* g = grad.template data<T>();
|
||||||
#pragma omp parallel for
|
|
||||||
|
// TODO(cxj): use OMP when it is reliable
|
||||||
|
// #pragma omp parallel for
|
||||||
for (TIndex i = 0; i < K; ++i) {
|
for (TIndex i = 0; i < K; ++i) {
|
||||||
SIndex idx = idxs[i];
|
SIndex idx = idxs[i];
|
||||||
DCHECK(0 <= idx && idx < N) << "Index out of bounds: " << idx
|
DCHECK(0 <= idx && idx < N) << "Index out of bounds: " << idx
|
||||||
|
|
|
||||||
|
|
@ -11,9 +11,11 @@
|
||||||
// platforms, it allows one to quickly port Caffe2 to different platforms
|
// platforms, it allows one to quickly port Caffe2 to different platforms
|
||||||
// where BLAS may not be present.
|
// where BLAS may not be present.
|
||||||
|
|
||||||
#include <random>
|
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <unistd.h>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
#ifdef CAFFE2_USE_MKL
|
#ifdef CAFFE2_USE_MKL
|
||||||
#include <mkl.h>
|
#include <mkl.h>
|
||||||
|
|
@ -486,6 +488,7 @@ void Set<T, CPUContext>(const int N, const T alpha, T *Y, \
|
||||||
CAFFE2_SPECIALIZED_SET(float);
|
CAFFE2_SPECIALIZED_SET(float);
|
||||||
CAFFE2_SPECIALIZED_SET(double);
|
CAFFE2_SPECIALIZED_SET(double);
|
||||||
CAFFE2_SPECIALIZED_SET(int);
|
CAFFE2_SPECIALIZED_SET(int);
|
||||||
|
CAFFE2_SPECIALIZED_SET(bool);
|
||||||
#undef CAFFE2_SPECIALIZED_SET
|
#undef CAFFE2_SPECIALIZED_SET
|
||||||
|
|
||||||
#define CAFFE2_INSTANTIATE_BINARY_OP(name, op, T) \
|
#define CAFFE2_INSTANTIATE_BINARY_OP(name, op, T) \
|
||||||
|
|
|
||||||
14
caffe2/utils/string_utils.cc
Normal file
14
caffe2/utils/string_utils.cc
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
#include "caffe2/utils/string_utils.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
|
||||||
|
std::vector<std::string> split(char separator, const std::string& string) {
|
||||||
|
std::vector<std::string> pieces;
|
||||||
|
std::stringstream ss(string);
|
||||||
|
std::string item;
|
||||||
|
while (getline(ss, item, separator)) {
|
||||||
|
pieces.push_back(std::move(item));
|
||||||
|
}
|
||||||
|
return pieces;
|
||||||
|
}
|
||||||
|
}
|
||||||
10
caffe2/utils/string_utils.h
Normal file
10
caffe2/utils/string_utils.h
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
|
||||||
|
std::vector<std::string> split(char separator, const std::string& string);
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user