mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
sync
This commit is contained in:
parent
f01f2063dd
commit
bcea409c82
|
|
@ -11,12 +11,70 @@ CAFFE2_DEFINE_int(
|
|||
"Chunk size to split tensor data into");
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
/**
|
||||
* @brief StringSerializer is the serializer for String.
|
||||
*
|
||||
* StringSerializer takes in a blob that contains a String, and serializes it
|
||||
* into a BlobProto protocol buffer.
|
||||
*/
|
||||
class StringSerializer : public BlobSerializerBase {
|
||||
public:
|
||||
StringSerializer() {}
|
||||
~StringSerializer() {}
|
||||
/**
|
||||
* Serializes a Blob. Note that this blob has to contain Tensor<Context>,
|
||||
* otherwise this function produces a fatal error.
|
||||
*/
|
||||
void Serialize(
|
||||
const Blob& blob,
|
||||
const string& name,
|
||||
SerializationAcceptor acceptor) override {
|
||||
CHECK(blob.IsType<std::string>());
|
||||
|
||||
BlobProto blob_proto;
|
||||
blob_proto.set_name(name);
|
||||
blob_proto.set_type("std::string");
|
||||
blob_proto.set_content(blob.template Get<std::string>());
|
||||
acceptor(name, blob_proto.SerializeAsString());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief StringDeserializer is the deserializer for Strings.
|
||||
*
|
||||
*/
|
||||
class StringDeserializer : public BlobDeserializerBase {
|
||||
public:
|
||||
bool Deserialize(const BlobProto& proto, Blob* blob) override {
|
||||
*blob->GetMutable<std::string>() = proto.content();
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// We can't use DeviceType_Name because of a protobuf-lite constraint.
|
||||
std::string tensorDeviceTypeName(const DeviceType& d) {
|
||||
switch (d) {
|
||||
case CPU:
|
||||
return "TensorCPU";
|
||||
case CUDA:
|
||||
return "TensorCUDA";
|
||||
default:
|
||||
CAFFE_THROW("Unknown device: ", d);
|
||||
return "";
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// The blob serialization member function implementation.
|
||||
void Blob::Serialize(
|
||||
const string& name,
|
||||
BlobSerializerBase::SerializationAcceptor acceptor) const {
|
||||
std::unique_ptr<BlobSerializerBase> serializer(CreateSerializer(meta_.id()));
|
||||
CAFFE_ENFORCE(serializer, "No known serializer for ", meta_.name());
|
||||
serializer->Serialize(*this, name, acceptor);
|
||||
}
|
||||
|
||||
|
|
@ -33,7 +91,6 @@ std::string Blob::Serialize(const string& name) const {
|
|||
return data.str();
|
||||
}
|
||||
|
||||
|
||||
// Specialization for StoreDeviceDetail for CPU - nothing needs to be done.
|
||||
template <>
|
||||
void TensorSerializer<CPUContext>::StoreDeviceDetail(
|
||||
|
|
@ -60,9 +117,8 @@ bool Blob::Deserialize(const BlobProto& blob_proto) {
|
|||
if (blob_proto.has_tensor()) {
|
||||
// This is a tensor object. Depending on the device type, we will
|
||||
// use the corresponding TensorDeserializer.
|
||||
auto deserializer = CreateDeserializer(
|
||||
"Tensor" +
|
||||
DeviceType_Name(blob_proto.tensor().device_detail().device_type()));
|
||||
auto deserializer = CreateDeserializer(tensorDeviceTypeName(
|
||||
blob_proto.tensor().device_detail().device_type()));
|
||||
// Tensor's deserializer should always be registered, but we will double
|
||||
// check if it is not null anyway.
|
||||
return CHECK_NOTNULL(deserializer.get())->Deserialize(blob_proto, this);
|
||||
|
|
@ -82,5 +138,8 @@ REGISTER_BLOB_SERIALIZER(
|
|||
(TypeMeta::Id<TensorCPU>()),
|
||||
TensorSerializer<CPUContext>);
|
||||
REGISTER_BLOB_DESERIALIZER(TensorCPU, TensorDeserializer<CPUContext>);
|
||||
// Serialize std::string
|
||||
REGISTER_BLOB_SERIALIZER((TypeMeta::Id<std::string>()), StringSerializer);
|
||||
REGISTER_BLOB_DESERIALIZER(std::string, StringDeserializer);
|
||||
} // namespace
|
||||
} // namespace caffe2
|
||||
|
|
|
|||
|
|
@ -69,6 +69,20 @@ TEST(BlobTest, BlobWrongType) {
|
|||
ASSERT_THROW(blob.Get<int>(), EnforceNotMet);
|
||||
}
|
||||
|
||||
TEST(BlobTest, StringSerialization) {
|
||||
const std::string kTestString = "Hello world?";
|
||||
Blob blob;
|
||||
*blob.GetMutable<std::string>() = kTestString;
|
||||
|
||||
string serialized = blob.Serialize("test");
|
||||
BlobProto proto;
|
||||
CHECK(proto.ParseFromString(serialized));
|
||||
EXPECT_EQ(proto.name(), "test");
|
||||
EXPECT_EQ(proto.type(), "std::string");
|
||||
EXPECT_FALSE(proto.has_tensor());
|
||||
EXPECT_EQ(proto.content(), kTestString);
|
||||
}
|
||||
|
||||
TEST(TensorNonTypedTest, TensorChangeType) {
|
||||
vector<int> dims(3);
|
||||
dims[0] = 2;
|
||||
|
|
|
|||
|
|
@ -5,8 +5,9 @@
|
|||
#include <cstdlib>
|
||||
#include <random>
|
||||
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/typeid.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
|
@ -103,6 +104,7 @@ class CPUContext final {
|
|||
// Two copy functions that deals with cross-device copies.
|
||||
template <class SrcContext, class DstContext>
|
||||
inline void CopyBytes(size_t nbytes, const void* src, void* dst);
|
||||
|
||||
template <typename T, class SrcContext, class DstContext>
|
||||
inline void Copy(size_t n, const T* src, T* dst) {
|
||||
if (std::is_fundamental<T>::value) {
|
||||
|
|
@ -116,6 +118,16 @@ class CPUContext final {
|
|||
}
|
||||
}
|
||||
|
||||
template <class SrcContext, class DstContext>
|
||||
inline void
|
||||
CopyItems(const TypeMeta& meta, size_t n, const void* src, void* dst) {
|
||||
if (meta.copy()) {
|
||||
meta.copy()(src, dst, n);
|
||||
} else {
|
||||
CopyBytes<SrcContext, DstContext>(n * meta.itemsize(), src, dst);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
// TODO(jiayq): instead of hard-coding a generator, make it more flexible.
|
||||
int random_seed_{1701};
|
||||
|
|
|
|||
|
|
@ -7,6 +7,16 @@ thread_local ThreadLocalCUDAObjects CUDAContext::cuda_objects_;
|
|||
|
||||
namespace {
|
||||
bool Caffe2UsePinnedCPUAllocator(int*, char***) {
|
||||
#ifdef __SANITIZE_ADDRESS__
|
||||
// Note(jiayq): for more details, see
|
||||
// https://github.com/google/sanitizers/issues/629
|
||||
LOG(WARNING) << "There are known issues between address sanitizer and "
|
||||
"cudaMallocHost. As a result, caffe2 will not enable pinned "
|
||||
"memory allocation in asan mode. If you are expecting any "
|
||||
"behavior that depends on asan, be advised that it is not "
|
||||
"turned on.";
|
||||
return true;
|
||||
#else
|
||||
if (!HasCudaGPU()) {
|
||||
VLOG(1) << "No GPU present. I won't use pinned allocator then.";
|
||||
return true;
|
||||
|
|
@ -14,6 +24,7 @@ bool Caffe2UsePinnedCPUAllocator(int*, char***) {
|
|||
VLOG(1) << "Caffe2 gpu: setting CPUAllocator to PinnedCPUAllocator.";
|
||||
SetCPUAllocator(new PinnedCPUAllocator());
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
REGISTER_CAFFE2_INIT_FUNCTION(Caffe2UsePinnedCPUAllocator,
|
||||
|
|
|
|||
|
|
@ -116,7 +116,9 @@ CAFFE_DECLARE_REGISTRY(Caffe2DBRegistry, DB, const string&, Mode);
|
|||
*/
|
||||
inline unique_ptr<DB> CreateDB(
|
||||
const string& db_type, const string& source, Mode mode) {
|
||||
return Caffe2DBRegistry()->Create(db_type, source, mode);
|
||||
auto result = Caffe2DBRegistry()->Create(db_type, source, mode);
|
||||
VLOG(1) << ((!result) ? "not found db " : "found db ") << db_type;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -68,13 +68,13 @@ class InitRegisterer {
|
|||
|
||||
#define REGISTER_CAFFE2_INIT_FUNCTION(name, function, description) \
|
||||
namespace { \
|
||||
::caffe2::InitRegisterer g_caffe2_initregisterer_name( \
|
||||
::caffe2::InitRegisterer g_caffe2_initregisterer_##name( \
|
||||
function, false, description); \
|
||||
} // namespace
|
||||
|
||||
#define REGISTER_CAFFE2_EARLY_INIT_FUNCTION(name, function, description) \
|
||||
namespace { \
|
||||
::caffe2::InitRegisterer g_caffe2_initregisterer_name( \
|
||||
::caffe2::InitRegisterer g_caffe2_initregisterer_##name( \
|
||||
function, true, description); \
|
||||
} // namespace
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,70 @@
|
|||
#include "caffe2/core/net.h"
|
||||
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/core/flags.h"
|
||||
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/timer.h"
|
||||
#include "caffe2/proto/caffe2.pb.h"
|
||||
|
||||
#ifdef CAFFE2_USE_NVTX
|
||||
#include <nvToolsExt.h>
|
||||
#endif
|
||||
|
||||
CAFFE2_DEFINE_bool(caffe2_use_nvtx, false, "Use NVTX ranges for profiling");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
|
||||
using Color = int32_t;
|
||||
constexpr Color kRunColor = 0x0000CCFF; // blue
|
||||
constexpr Color kRecordColor = 0x00FF3300; // red
|
||||
constexpr Color kWaitColor = 0x0066FF33; // green
|
||||
|
||||
#ifdef CAFFE2_USE_NVTX
|
||||
|
||||
class ProfiledRange {
|
||||
public:
|
||||
ProfiledRange(const OperatorDef& def, Color color) {
|
||||
if (!FLAGS_caffe2_use_nvtx) {
|
||||
return;
|
||||
}
|
||||
nvtxEventAttributes_t eventAttrib = {0};
|
||||
eventAttrib.version = NVTX_VERSION;
|
||||
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
|
||||
eventAttrib.colorType = NVTX_COLOR_ARGB;
|
||||
eventAttrib.color = color;
|
||||
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
|
||||
eventAttrib.message.ascii = def.type().c_str();
|
||||
range_ = nvtxRangeStartEx(&eventAttrib);
|
||||
CHECK(range_);
|
||||
}
|
||||
|
||||
~ProfiledRange() {
|
||||
if (!FLAGS_caffe2_use_nvtx) {
|
||||
return;
|
||||
}
|
||||
nvtxRangeEnd(range_);
|
||||
}
|
||||
|
||||
private:
|
||||
nvtxRangeId_t range_ = 0;
|
||||
DISABLE_COPY_AND_ASSIGN(ProfiledRange);
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
class ProfiledRange {
|
||||
public:
|
||||
ProfiledRange(const OperatorDef& def, Color color) {}
|
||||
|
||||
private:
|
||||
DISABLE_COPY_AND_ASSIGN(ProfiledRange);
|
||||
};
|
||||
|
||||
#endif // ifdef CAFFE2_USE_NVTX
|
||||
|
||||
struct Stream;
|
||||
|
||||
struct Event {
|
||||
|
|
@ -69,6 +125,7 @@ struct Stream {
|
|||
|
||||
int gpu_id_{-1};
|
||||
cudaStream_t stream_{nullptr};
|
||||
|
||||
private:
|
||||
DISABLE_COPY_AND_ASSIGN(Stream);
|
||||
};
|
||||
|
|
@ -128,18 +185,24 @@ class AsyncDAGNet : public DAGNetBase {
|
|||
}));
|
||||
|
||||
for (auto source_parent_idx : operator_nodes_[source_idx].parents_) {
|
||||
ProfiledRange r(
|
||||
operator_nodes_[source_parent_idx].operator_->def(), kWaitColor);
|
||||
stream.wait(events_[source_parent_idx].get());
|
||||
}
|
||||
|
||||
// We've waited on all our parent indices.
|
||||
bool success = true;
|
||||
for (auto idx: chain) {
|
||||
for (auto idx : chain) {
|
||||
ProfiledRange r(operator_nodes_[idx].operator_->def(), kRunColor);
|
||||
success &= operator_nodes_[idx].operator_->RunAsync();
|
||||
}
|
||||
|
||||
// Record an event for the sink of the chain.
|
||||
const auto& sink_idx = chain.back();
|
||||
{
|
||||
ProfiledRange r(operator_nodes_[sink_idx].operator_->def(), kRecordColor);
|
||||
events_[sink_idx]->record(stream);
|
||||
}
|
||||
CHECK(!eventRecorded_[sink_idx]);
|
||||
eventRecorded_[sink_idx] = 1;
|
||||
return success;
|
||||
|
|
@ -157,9 +220,11 @@ class AsyncDAGNet : public DAGNetBase {
|
|||
Stream stream{device_option};
|
||||
|
||||
// Potential optimization: we can pre-compute outstanding events.
|
||||
for (auto& event : events_) {
|
||||
for (auto i = 0; i < events_.size(); ++i) {
|
||||
auto& event = events_[i];
|
||||
if (event->outstanding_) {
|
||||
VLOG(2) << "Synchronizing host on outstanding event";
|
||||
ProfiledRange r(operator_nodes_[i].operator_->def(), kWaitColor);
|
||||
stream.wait(event.get());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,17 +2,6 @@
|
|||
#include "caffe2/core/flags.h"
|
||||
|
||||
CAFFE2_DEFINE_bool(
|
||||
caffe2_keep_on_shrink, false,
|
||||
caffe2_keep_on_shrink,
|
||||
true,
|
||||
"If set, keeps memory when a tensor is shrinking its size.");
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace detail {
|
||||
|
||||
vector<TIndex>& shape(size_t n) {
|
||||
static thread_local vector<TIndex> r;
|
||||
r.resize(n);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -150,6 +150,40 @@ class Tensor {
|
|||
|
||||
virtual ~Tensor() {}
|
||||
|
||||
/**
|
||||
* @brief Extends the outer-most dimension of this tensor by num elements,
|
||||
* preserving the existing data.
|
||||
*
|
||||
* The underlying data may be reallocated in order to accommodate the new
|
||||
* elements, in which case this tensors' capacity is grown at a factor of
|
||||
* growthPct. This ensures that Extend runs on an amortized O(1) time
|
||||
* complexity.
|
||||
*/
|
||||
template <class ContextForCopy>
|
||||
void Extend(TIndex num, int growthPct, ContextForCopy* context) {
|
||||
CHECK_GE(dims_.size(), 1);
|
||||
auto oldSize = size_;
|
||||
auto newDims = dims_;
|
||||
newDims[0] += num;
|
||||
if (!data_) {
|
||||
Resize(newDims);
|
||||
return;
|
||||
}
|
||||
auto newSize = std::accumulate(
|
||||
newDims.begin(), newDims.end(), 1, std::multiplies<TIndex>());
|
||||
if (newSize * meta_.itemsize() > capacity_) {
|
||||
auto newCapacity = dims_;
|
||||
newCapacity[0] = std::max(newDims[0], dims_[0] * (growthPct + 100) / 100);
|
||||
auto oldData = std::move(data_);
|
||||
Resize(newCapacity);
|
||||
auto* newData = raw_mutable_data(meta_);
|
||||
context->template CopyItems<ContextForCopy, ContextForCopy>(
|
||||
meta_, oldSize, oldData.get(), newData);
|
||||
}
|
||||
dims_ = newDims;
|
||||
size_ = newSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Resizes a tensor.
|
||||
*
|
||||
|
|
@ -297,9 +331,12 @@ class Tensor {
|
|||
CHECK(data_.get() || size_ == 0)
|
||||
<< "The tensor is uninitialized. You probably need to call "
|
||||
<< "Resize() and mutable_data() first.";
|
||||
CHECK(IsType<T>())
|
||||
<< "Tensor type mistmatch, caller expects elements to be "
|
||||
<< TypeMeta::Name<T>() << " while tensor contains " << meta_.name();
|
||||
CAFFE_ENFORCE(
|
||||
IsType<T>(),
|
||||
"Tensor type mistmatch, caller expects elements to be ",
|
||||
TypeMeta::Name<T>(),
|
||||
" while tensor contains ",
|
||||
meta_.name());
|
||||
return static_cast<T*>(data_.get());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,34 +12,39 @@
|
|||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
// Returns a function that returns `true` if we should continue
|
||||
// iterating, given the current iteration count.
|
||||
std::function<bool(int)> getContinuationTest(
|
||||
Workspace* ws,
|
||||
const ExecutionStep& step) {
|
||||
if (step.has_criteria_network()) {
|
||||
CHECK(!step.has_num_iter())
|
||||
<< "Must not specify num_iter if critera_network is set";
|
||||
// try to get the should_stop signal, a scalar bool blob value.
|
||||
// if the blob doesn't exist or is not initiaized, return false
|
||||
const bool getShouldStop(const Blob* b) {
|
||||
if (!b || !b->meta().id()) { // not exist or uninitialized
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!step.has_criteria_network()) {
|
||||
int iterations = step.has_num_iter() ? step.num_iter() : 1;
|
||||
VLOG(1) << "Executing step for " << iterations << " iterations.";
|
||||
return [=](int i) { return i < iterations; };
|
||||
const auto& t = b->Get<TensorCPU>();
|
||||
CAFFE_ENFORCE(t.IsType<bool>() && t.size() == 1, "expects a scalar boolean");
|
||||
return *(t.template data<bool>());
|
||||
}
|
||||
|
||||
// Returns a function that returns `true` if we should continue
|
||||
// iterating, given the current iteration count.
|
||||
std::function<bool(int64_t)> getContinuationTest(
|
||||
Workspace* ws,
|
||||
const ExecutionStep& step) {
|
||||
if (step.has_should_stop_blob()) {
|
||||
CAFFE_ENFORCE(
|
||||
!step.has_num_iter(),
|
||||
"Must not specify num_iter if should_stop_blob is set");
|
||||
}
|
||||
|
||||
if (!step.has_should_stop_blob()) {
|
||||
int64_t iterations = step.has_num_iter() ? step.num_iter() : 1;
|
||||
VLOG(1) << "Will execute step " << step.name() << " for " << iterations
|
||||
<< " iterations.";
|
||||
return [=](int64_t i) { return i < iterations; };
|
||||
} else {
|
||||
VLOG(1) << "Will execute step " << step.name() << " until stopped by blob "
|
||||
<< step.should_stop_blob();
|
||||
return [](int64_t i) { return true; };
|
||||
}
|
||||
auto* criteria_network = ws->GetNet(step.criteria_network());
|
||||
CHECK_NOTNULL(criteria_network);
|
||||
CHECK_EQ(criteria_network->external_output().size(), 1);
|
||||
const auto& criteria_output = criteria_network->external_output().front();
|
||||
VLOG(1) << "Executing step controlled by criteria output: "
|
||||
<< criteria_output;
|
||||
return [=](int) {
|
||||
criteria_network->Run();
|
||||
const auto& blob = ws->GetBlob(criteria_output)->Get<TensorCPU>();
|
||||
CHECK_EQ(blob.size(), 1);
|
||||
CHECK(blob.IsType<bool>());
|
||||
return blob.template data<bool>()[0] > 0;
|
||||
};
|
||||
};
|
||||
} // namespace
|
||||
|
||||
|
|
@ -229,10 +234,17 @@ struct Reporter {
|
|||
|
||||
}
|
||||
|
||||
#define CHECK_SHOULD_STOP(shouldStop) \
|
||||
if (getShouldStop(shouldStop)) { \
|
||||
VLOG(1) << "Execution stopped by should_stop_blob"; \
|
||||
return true; \
|
||||
}
|
||||
|
||||
bool Workspace::ExecuteStepRecursive(
|
||||
const ExecutionStep& step,
|
||||
ShouldContinue externalShouldContinue) {
|
||||
LOG(INFO) << "Running execution step " << step.name();
|
||||
VLOG(1) << "Running execution step " << step.name();
|
||||
|
||||
if (!(step.substep_size() == 0 || step.network_size() == 0)) {
|
||||
LOG(ERROR) << "An ExecutionStep should either have substep or networks "
|
||||
<< "but not both.";
|
||||
|
|
@ -247,22 +259,40 @@ bool Workspace::ExecuteStepRecursive(
|
|||
if (net_map_.count(step.report_net()) == 0) {
|
||||
LOG(ERROR) << "Report net " << step.report_net() << " not found.";
|
||||
}
|
||||
VLOG(1) << "Starting reporter net";
|
||||
reporter.start(net_map_[step.report_net()].get(), step.report_interval());
|
||||
}
|
||||
|
||||
const Blob* shouldStop = nullptr;
|
||||
if (step.has_should_stop_blob()) {
|
||||
shouldStop = GetBlob(step.should_stop_blob());
|
||||
CAFFE_ENFORCE(
|
||||
shouldStop, "blob ", step.should_stop_blob(), " does not exist");
|
||||
}
|
||||
|
||||
const auto netShouldContinue = getContinuationTest(this, step);
|
||||
const auto shouldContinue = [&](int iter) {
|
||||
const auto shouldContinue = [&](int64_t iter) {
|
||||
return externalShouldContinue(iter) && netShouldContinue(iter);
|
||||
};
|
||||
if (step.substep_size()) {
|
||||
for (int iter = 0; shouldContinue(iter); ++iter) {
|
||||
// we assume that, if we have substeps, each substep is going to take a
|
||||
// reasonable amount of time, so logging here is fine
|
||||
LOG(INFO) << "Execution step " << step.name()
|
||||
<< ": Starting iteration " << iter;
|
||||
for (int64_t iter = 0; shouldContinue(iter); ++iter) {
|
||||
VLOG(1) << "Execution step " << step.name() << ": iteration " << iter;
|
||||
|
||||
if (!step.concurrent_substeps() || step.substep().size() <= 1) {
|
||||
auto substepShouldContinue = [&, externalShouldContinue](int64_t iter) {
|
||||
return externalShouldContinue(iter);
|
||||
};
|
||||
|
||||
for (auto& ss : step.substep()) {
|
||||
if (!ExecuteStepRecursive(ss, substepShouldContinue)) {
|
||||
return false;
|
||||
}
|
||||
CHECK_SHOULD_STOP(shouldStop);
|
||||
}
|
||||
} else {
|
||||
std::atomic<int> next_substep{0};
|
||||
std::atomic<bool> got_failure{false};
|
||||
auto substepShouldContinue = [&, externalShouldContinue](int iter) {
|
||||
auto substepShouldContinue = [&, externalShouldContinue](int64_t iter) {
|
||||
return !got_failure && externalShouldContinue(iter);
|
||||
};
|
||||
auto worker = [&]() {
|
||||
|
|
@ -271,26 +301,26 @@ bool Workspace::ExecuteStepRecursive(
|
|||
if (got_failure || (substep_id >= step.substep().size())) {
|
||||
break;
|
||||
}
|
||||
if (!ExecuteStepRecursive(step.substep().Get(substep_id),
|
||||
substepShouldContinue)) {
|
||||
if (!ExecuteStepRecursive(
|
||||
step.substep().Get(substep_id), substepShouldContinue)) {
|
||||
got_failure = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
if (!step.concurrent_substeps() || step.substep().size() <= 1) {
|
||||
worker();
|
||||
} else {
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
for (int i = 0; i < step.substep().size(); ++i) {
|
||||
for (int64_t i = 0; i < step.substep().size(); ++i) {
|
||||
threads.emplace_back(worker);
|
||||
}
|
||||
for (auto& thread: threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
if (got_failure) {
|
||||
return false;
|
||||
}
|
||||
// concurrent substeps should be careful about setting should_stop_blob
|
||||
CHECK_SHOULD_STOP(shouldStop);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
|
|
@ -305,16 +335,19 @@ bool Workspace::ExecuteStepRecursive(
|
|||
VLOG(1) << "Going to execute network " << network_name;
|
||||
networks.push_back(net_map_[network_name].get());
|
||||
}
|
||||
for (int iter = 0; shouldContinue(iter); ++iter) {
|
||||
for (int64_t iter = 0; shouldContinue(iter); ++iter) {
|
||||
VLOG(1) << "Executing network iteration " << iter;
|
||||
for (NetBase* network : networks) {
|
||||
if (!network->Run()) {
|
||||
return false;
|
||||
}
|
||||
CHECK_SHOULD_STOP(shouldStop);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#undef CHECK_SHOULD_STOP
|
||||
|
||||
} // namespace caffe2
|
||||
|
|
|
|||
73
caffe2/operators/atomic_ops.cc
Normal file
73
caffe2/operators/atomic_ops.cc
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
#include <mutex>
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace fb {
|
||||
namespace {
|
||||
|
||||
class CreateMutexOp final : public Operator<CPUContext> {
|
||||
public:
|
||||
CreateMutexOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<CPUContext>(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
*OperatorBase::Output<std::unique_ptr<std::mutex>>(0) =
|
||||
std::unique_ptr<std::mutex>(new std::mutex);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class AtomicFetchAddOp final : public Operator<CPUContext> {
|
||||
public:
|
||||
AtomicFetchAddOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<CPUContext>(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& mutex = OperatorBase::Input<std::unique_ptr<std::mutex>>(0);
|
||||
auto& a = Input(1);
|
||||
auto& b = Input(2);
|
||||
auto* c = Output(0);
|
||||
auto* d = Output(1);
|
||||
c->Resize(std::vector<TIndex>());
|
||||
d->Resize(std::vector<TIndex>());
|
||||
auto* aPtr = a.data<int32_t>();
|
||||
auto* bPtr = b.data<int32_t>();
|
||||
auto* cPtr = c->mutable_data<int32_t>();
|
||||
auto* dPtr = d->mutable_data<int32_t>();
|
||||
std::lock_guard<std::mutex> lg(*mutex);
|
||||
*dPtr = *aPtr;
|
||||
*cPtr = *aPtr + *bPtr;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_CPU_OPERATOR(CreateMutex, CreateMutexOp);
|
||||
REGISTER_CPU_OPERATOR(AtomicFetchAdd, AtomicFetchAddOp);
|
||||
|
||||
OPERATOR_SCHEMA(CreateMutex)
|
||||
.NumInputs(0)
|
||||
.NumOutputs(1)
|
||||
.SetDoc("Creates an unlocked mutex and returns it in a unique_ptr blob.")
|
||||
.Output(0, "mutex_ptr", "Blob containing a std::unique_ptr<mutex>.");
|
||||
|
||||
OPERATOR_SCHEMA(AtomicFetchAdd)
|
||||
.NumInputs(3)
|
||||
.NumOutputs(2)
|
||||
.SetDoc(R"DOC(
|
||||
Given a mutex and two int32 scalar tensors, performs an atomic fetch add
|
||||
by mutating the first argument and adding it to the second input
|
||||
argument. Returns the updated integer and the value prior to the update.
|
||||
)DOC")
|
||||
.Input(0, "mutex_ptr", "Blob containing to a unique_ptr<mutex>")
|
||||
.Input(1, "mut_value", "Value to be mutated after the sum.")
|
||||
.Input(2, "increment", "Value to add to the first operand.")
|
||||
.Output(0, "mut_value", "Mutated value after sum. Usually same as input 1.")
|
||||
.Output(1, "fetched_value", "Value of the first operand before sum.")
|
||||
.AllowInplace({{1, 0}});
|
||||
|
||||
SHOULD_NOT_DO_GRADIENT(CreateMutex);
|
||||
SHOULD_NOT_DO_GRADIENT(AtomicFetchAdd);
|
||||
}
|
||||
}
|
||||
}
|
||||
143
caffe2/operators/boolean_mask_ops.cc
Normal file
143
caffe2/operators/boolean_mask_ops.cc
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/tensor.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
|
||||
template <class Context>
|
||||
class BooleanMaskLengthsOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
BooleanMaskLengthsOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool DoRunWithType() {
|
||||
auto& lengths = Input(0);
|
||||
auto& mask = Input(1);
|
||||
auto* lengthsOut = Output(0);
|
||||
CAFFE_ENFORCE(lengths.ndim() == 1);
|
||||
CAFFE_ENFORCE(mask.ndim() == 1);
|
||||
const auto* lengthsPtr = lengths.template data<T>();
|
||||
const auto* maskPtr = mask.template data<bool>();
|
||||
auto totalLength =
|
||||
std::accumulate(lengthsPtr, lengthsPtr + lengths.size(), 0);
|
||||
CAFFE_ENFORCE(mask.size() == totalLength);
|
||||
lengthsOut->ResizeLike(lengths);
|
||||
auto* lengthsOutPtr = lengthsOut->template mutable_data<T>();
|
||||
int p = 0;
|
||||
for (int i = 0; i < lengths.size(); ++i) {
|
||||
T lengthOut = 0;
|
||||
for (int j = 0; j < lengthsPtr[i]; ++j) {
|
||||
if (maskPtr[p++]) {
|
||||
++lengthOut;
|
||||
}
|
||||
}
|
||||
lengthsOutPtr[i] = lengthOut;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <class Context>
|
||||
class BooleanMaskOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
BooleanMaskOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& data = Input(0);
|
||||
auto& mask = Input(1);
|
||||
auto* dataOut = Output(0);
|
||||
CAFFE_ENFORCE(data.ndim() >= 1);
|
||||
CAFFE_ENFORCE(mask.ndim(), 1);
|
||||
CAFFE_ENFORCE(data.dims()[0] == mask.dims()[0]);
|
||||
|
||||
const auto* maskPtr = mask.template data<bool>();
|
||||
int numOutputs = 0;
|
||||
int outerSize = mask.size();
|
||||
for (int i = 0; i < outerSize; ++i) {
|
||||
if (maskPtr[i]) {
|
||||
++numOutputs;
|
||||
}
|
||||
}
|
||||
std::vector<TIndex> outShape;
|
||||
outShape.push_back(numOutputs);
|
||||
outShape.insert(outShape.end(), data.dims().begin() + 1, data.dims().end());
|
||||
dataOut->Resize(outShape);
|
||||
if (numOutputs == 0) {
|
||||
return true;
|
||||
}
|
||||
auto innerSizeBytes = std::accumulate(
|
||||
data.dims().begin() + 1,
|
||||
data.dims().end(),
|
||||
1,
|
||||
std::multiplies<TIndex>()) *
|
||||
data.meta().itemsize();
|
||||
TIndex lastStart = -1;
|
||||
const auto* inPtr = (char*)data.raw_data();
|
||||
auto* outPtr = (char*)dataOut->raw_mutable_data(data.meta());
|
||||
TIndex outStart = 0;
|
||||
for (TIndex i = 0;; ++i) {
|
||||
// mask was true and either a) became false, or b) sequence finished
|
||||
if (lastStart != -1 && ((i >= outerSize) || !maskPtr[i])) {
|
||||
const auto* src = inPtr + lastStart * innerSizeBytes;
|
||||
auto* dst = outPtr + outStart * innerSizeBytes;
|
||||
int numItems = i - lastStart;
|
||||
if (data.meta().copy()) {
|
||||
data.meta().copy()(src, dst, numItems);
|
||||
} else {
|
||||
context_.template CopyBytes<CPUContext, CPUContext>(
|
||||
numItems * data.meta().itemsize(), src, dst);
|
||||
}
|
||||
outStart += numItems;
|
||||
lastStart = -1;
|
||||
}
|
||||
if (i >= outerSize) {
|
||||
break;
|
||||
}
|
||||
// mask was false and became true
|
||||
if (lastStart == -1 && maskPtr[i]) {
|
||||
lastStart = i;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_CPU_OPERATOR(BooleanMask, BooleanMaskOp<CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(BooleanMaskLengths, BooleanMaskLengthsOp<CPUContext>);
|
||||
|
||||
OPERATOR_SCHEMA(BooleanMask)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Given a data 1D tensor and a mask (boolean) tensor of same shape, returns a
|
||||
tensor containing only the elements corresponding to positions where the mask
|
||||
is true.
|
||||
)DOC")
|
||||
.Input(0, "data", "The 1D, original data tensor.")
|
||||
.Input(1, "mask", "A tensor of bools of same shape as `data`.")
|
||||
.Output(0, "masked_data", "A tensor of same type as `data`.");
|
||||
|
||||
OPERATOR_SCHEMA(BooleanMaskLengths)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Given a tensor of int32 segment lengths and a mask (boolean) tensor, return
|
||||
the segment lengths of a corresponding segmented tensor after BooleanMask is
|
||||
applied.
|
||||
)DOC")
|
||||
.Input(0, "lengths", "A 1D int32 tensor representing segment lengths.")
|
||||
.Input(1, "mask", "A 1D bool tensor of values to keep.")
|
||||
.Output(0, "masked_lengths", "Segment lengths of a masked tensor.");
|
||||
|
||||
NO_GRADIENT(BooleanMask)
|
||||
NO_GRADIENT(BooleanMaskLengths);
|
||||
}
|
||||
}
|
||||
|
|
@ -10,8 +10,14 @@ OPERATOR_SCHEMA(Concat).NumInputs(1, INT_MAX).NumOutputs(2);
|
|||
// Backward compatibility names.
|
||||
REGISTER_CPU_OPERATOR(DepthSplit, SplitOp<CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(DepthConcat, ConcatOp<CPUContext>);
|
||||
OPERATOR_SCHEMA(DepthSplit).NumInputs(1, 2).NumOutputs(1, INT_MAX);
|
||||
OPERATOR_SCHEMA(DepthConcat).NumInputs(1, INT_MAX).NumOutputs(2);
|
||||
OPERATOR_SCHEMA(DepthSplit)
|
||||
.NumInputs(1, 2)
|
||||
.NumOutputs(1, INT_MAX)
|
||||
.SetDoc("Backward compatible operator name for Split.");
|
||||
OPERATOR_SCHEMA(DepthConcat)
|
||||
.NumInputs(1, INT_MAX)
|
||||
.NumOutputs(2)
|
||||
.SetDoc("Backward compatible operator name for Concat.");
|
||||
|
||||
class GetSplitGradient : public GradientMakerBase {
|
||||
using GradientMakerBase::GradientMakerBase;
|
||||
|
|
|
|||
|
|
@ -85,7 +85,9 @@ class ConcatOp final : public Operator<Context> {
|
|||
template <class Context>
|
||||
bool SplitOp<Context>::RunOnDevice() {
|
||||
auto& input = Input(0);
|
||||
const int input_channels = input.dim32(axis_);
|
||||
const int* axis_data;
|
||||
vector<int> equal_split;
|
||||
if (InputSize() == 2) {
|
||||
// We obtain split from the input tensor.
|
||||
CHECK_EQ(split_.size(), 0)
|
||||
|
|
@ -94,13 +96,21 @@ bool SplitOp<Context>::RunOnDevice() {
|
|||
auto& split_tensor = OperatorBase::Input<TensorCPU>(1);
|
||||
CHECK_EQ(split_tensor.size(), OutputSize());
|
||||
axis_data = split_tensor.template data<int>();
|
||||
} else if (split_.size() == 0) {
|
||||
CAFFE_ENFORCE(input_channels % OutputSize() == 0,
|
||||
"If you did not specify split explicitly, the number of "
|
||||
"input channels should be divisible by the output size.");
|
||||
equal_split.resize(OutputSize(), input_channels / OutputSize());
|
||||
axis_data = equal_split.data();
|
||||
} else {
|
||||
// We obtain split from the parameters.
|
||||
CHECK_EQ(split_.size(), OutputSize());
|
||||
CAFFE_ENFORCE(split_.size() == OutputSize(),
|
||||
"The number of splits specified should be equal to the "
|
||||
"number of outputs.");
|
||||
axis_data = split_.data();
|
||||
}
|
||||
CHECK_LT(axis_, input.ndim());
|
||||
const int input_channels = input.dim32(axis_);
|
||||
|
||||
CHECK_EQ(std::accumulate(axis_data, axis_data + OutputSize(), 0),
|
||||
input_channels)
|
||||
<< "Sum of split dimensions do not match: should be " << input_channels;
|
||||
|
|
|
|||
|
|
@ -22,11 +22,11 @@ template <typename ArrayOfcudnnConvolutionAlgoPerf_t>
|
|||
inline void LogCuDNNPerfStats(
|
||||
const ArrayOfcudnnConvolutionAlgoPerf_t& perf_stat,
|
||||
int returned_algo_count) {
|
||||
LOG(INFO) << "Perf result: (algo: stat, time, memory)";
|
||||
VLOG(1) << "Perf result: (algo: stat, time, memory)";
|
||||
for (int i = 0; i < returned_algo_count; ++i) {
|
||||
const auto& stat = perf_stat[i];
|
||||
LOG(INFO) << stat.algo << ": " << stat.status
|
||||
<< " " << stat.time << " " << stat.memory;
|
||||
VLOG(1) << stat.algo << ": " << stat.status << " " << stat.time << " "
|
||||
<< stat.memory;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
|
@ -193,7 +193,7 @@ bool CudnnConvOp<T>::RunOnDevice() {
|
|||
if (deterministic_) {
|
||||
algo_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
|
||||
} else if (exhaustive_search_) {
|
||||
LOG(INFO) << "CUDNN Convolution: doing exhaustive search.";
|
||||
VLOG(1) << "CUDNN Convolution: doing exhaustive search.";
|
||||
// When we do an exhaustive search, we will ignore the workspace size
|
||||
// limit and simply go for the fastest algorithm. If you happen to run
|
||||
// out of memory later, you will be on your own...
|
||||
|
|
@ -229,8 +229,8 @@ bool CudnnConvOp<T>::RunOnDevice() {
|
|||
cudnn_wrapper_.inline_cudnn_handle(),
|
||||
bottom_desc_, filter_desc_, conv_desc_, top_desc_,
|
||||
algo_, &cudnn_ws_nbytes_));
|
||||
LOG(INFO) << "CuDNN algorithm: " << algo_;
|
||||
LOG(INFO) << "CuDNN workspace size: " << cudnn_ws_nbytes_;
|
||||
VLOG(1) << "CuDNN algorithm: " << algo_;
|
||||
VLOG(1) << "CuDNN workspace size: " << cudnn_ws_nbytes_;
|
||||
}
|
||||
|
||||
// Now, actually run the computation.
|
||||
|
|
@ -346,7 +346,7 @@ bool CudnnConvGradientOp<T>::RunOnDevice() {
|
|||
bwd_data_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
|
||||
bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
|
||||
} else if (exhaustive_search_) {
|
||||
LOG(INFO) << "CUDNN Convolution bwd: doing exhaustive search.";
|
||||
VLOG(1) << "CUDNN Convolution bwd: doing exhaustive search.";
|
||||
// When we do an exhaustive search, we will ignore the workspace size
|
||||
// limit and simply go for the fastest algorithm. If you happen to run
|
||||
// out of memory later, you will be on your own...
|
||||
|
|
@ -416,9 +416,9 @@ bool CudnnConvGradientOp<T>::RunOnDevice() {
|
|||
bwd_data_algo_, &bwd_data_ws_size));
|
||||
cudnn_ws_nbytes_ = std::max(bwd_filter_ws_size, bwd_data_ws_size);
|
||||
|
||||
LOG(INFO) << "CuDNN bwd algorithm: " << bwd_filter_algo_ << ", "
|
||||
VLOG(1) << "CuDNN bwd algorithm: " << bwd_filter_algo_ << ", "
|
||||
<< bwd_data_algo_;
|
||||
LOG(INFO) << "CuDNN workspace size: " << cudnn_ws_nbytes_;
|
||||
VLOG(1) << "CuDNN workspace size: " << cudnn_ws_nbytes_;
|
||||
}
|
||||
|
||||
// Now, actually run the computation.
|
||||
|
|
|
|||
579
caffe2/operators/conv_transpose_op_cudnn.cc
Normal file
579
caffe2/operators/conv_transpose_op_cudnn.cc
Normal file
|
|
@ -0,0 +1,579 @@
|
|||
#include "caffe2/core/common_cudnn.h"
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/operators/conv_transpose_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
// Earlier in the days Caffe sets the default cudnn workspace to 8MB. We bump
|
||||
// it up to 64MB in Caffe2, as this enables the use of Winograd in many cases,
|
||||
// something very beneficial to more recent CNN models.
|
||||
static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES = 64 * 1024 * 1024;
|
||||
|
||||
// Manually specified number of algorithms implemented in CuDNN.
|
||||
// This does not have any performance implications, as we will always find the
|
||||
// fastest algorithm; setting them to the right number of algorithms will enable
|
||||
// us to best report the statistics when doing an exhaustive search, though.
|
||||
static constexpr size_t kNUM_CUDNN_FWD_ALGS = 7;
|
||||
static constexpr size_t kNUM_CUDNN_BWD_FILTER_ALGS = 4;
|
||||
static constexpr size_t kNUM_CUDNN_BWD_DATA_ALGS = 5;
|
||||
|
||||
namespace {
|
||||
template <typename ArrayOfcudnnConvolutionAlgoPerf_t>
|
||||
inline void LogCuDNNPerfStats(
|
||||
const ArrayOfcudnnConvolutionAlgoPerf_t& perf_stat,
|
||||
int returned_algo_count) {
|
||||
LOG(INFO) << "Perf result: (algo: stat, time, memory)";
|
||||
for (int i = 0; i < returned_algo_count; ++i) {
|
||||
const auto& stat = perf_stat[i];
|
||||
LOG(INFO) << stat.algo << ": " << stat.status << " " << stat.time << " "
|
||||
<< stat.memory;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
class CudnnConvTransposeOpBase : public ConvTransposeUnpoolBase<CUDAContext> {
|
||||
public:
|
||||
CudnnConvTransposeOpBase(const OperatorDef& operator_def, Workspace* ws)
|
||||
: ConvTransposeUnpoolBase<CUDAContext>(operator_def, ws),
|
||||
cudnn_wrapper_(&context_),
|
||||
cudnn_ws_nbytes_limit_(OperatorBase::GetSingleArgument<size_t>(
|
||||
"ws_nbytes_limit",
|
||||
kCONV_CUDNN_WORKSPACE_LIMIT_BYTES)),
|
||||
exhaustive_search_(
|
||||
OperatorBase::GetSingleArgument<int>("exhaustive_search", 0)),
|
||||
deterministic_(
|
||||
OperatorBase::GetSingleArgument<int>("deterministic", 0)),
|
||||
cudnn_state_(OperatorBase::GetSingleArgument<int>("cudnn_state", 0)) {
|
||||
CHECK(!deterministic_ || !exhaustive_search_);
|
||||
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bottom_desc_));
|
||||
CUDNN_CHECK(cudnnCreateFilterDescriptor(&filter_desc_));
|
||||
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc_));
|
||||
CUDNN_CHECK(cudnnCreateTensorDescriptor(&top_desc_));
|
||||
CUDNN_CHECK(cudnnCreateConvolutionDescriptor(&conv_desc_));
|
||||
}
|
||||
|
||||
~CudnnConvTransposeOpBase() {
|
||||
CUDNN_CHECK(cudnnDestroyTensorDescriptor(bottom_desc_));
|
||||
CUDNN_CHECK(cudnnDestroyFilterDescriptor(filter_desc_));
|
||||
CUDNN_CHECK(cudnnDestroyTensorDescriptor(bias_desc_));
|
||||
CUDNN_CHECK(cudnnDestroyTensorDescriptor(top_desc_));
|
||||
CUDNN_CHECK(cudnnDestroyConvolutionDescriptor(conv_desc_));
|
||||
}
|
||||
|
||||
protected:
|
||||
vector<TIndex> cudnn_input_dims_;
|
||||
vector<TIndex> cudnn_filter_dims_;
|
||||
|
||||
CuDNNWrapper cudnn_wrapper_;
|
||||
cudnnTensorDescriptor_t bottom_desc_;
|
||||
cudnnFilterDescriptor_t filter_desc_;
|
||||
cudnnTensorDescriptor_t bias_desc_;
|
||||
cudnnTensorDescriptor_t top_desc_;
|
||||
cudnnConvolutionDescriptor_t conv_desc_;
|
||||
const size_t cudnn_ws_nbytes_limit_;
|
||||
size_t cudnn_ws_nbytes_;
|
||||
bool exhaustive_search_;
|
||||
bool deterministic_;
|
||||
size_t cudnn_state_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class CudnnConvTransposeOp final : public CudnnConvTransposeOpBase {
|
||||
public:
|
||||
CudnnConvTransposeOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: CudnnConvTransposeOpBase(operator_def, ws) {}
|
||||
|
||||
~CudnnConvTransposeOp() {}
|
||||
|
||||
bool RunOnDevice() override;
|
||||
|
||||
private:
|
||||
cudnnConvolutionBwdDataAlgo_t bwd_data_algo_;
|
||||
// Input: X, W, b
|
||||
// Output: Y
|
||||
INPUT_TAGS(INPUT, FILTER, BIAS);
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class CudnnConvTransposeGradientOp final : public CudnnConvTransposeOpBase {
|
||||
public:
|
||||
CudnnConvTransposeGradientOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: CudnnConvTransposeOpBase(operator_def, ws) {}
|
||||
|
||||
~CudnnConvTransposeGradientOp() {}
|
||||
|
||||
bool RunOnDevice() override;
|
||||
|
||||
private:
|
||||
cudnnConvolutionFwdAlgo_t algo_;
|
||||
cudnnConvolutionBwdFilterAlgo_t bwd_filter_algo_;
|
||||
// input: X, W, dY
|
||||
// output: dW, db, and optionally dX
|
||||
INPUT_TAGS(INPUT, FILTER, OUTPUT_GRAD);
|
||||
OUTPUT_TAGS(FILTER_GRAD, BIAS_GRAD, INPUT_GRAD);
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Implementations
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T>
|
||||
bool CudnnConvTransposeOp<T>::RunOnDevice() {
|
||||
auto& X = Input(INPUT);
|
||||
auto& filter = Input(FILTER);
|
||||
auto& bias = Input(BIAS);
|
||||
auto* Y = Output(0);
|
||||
int C = 0;
|
||||
switch (order_) {
|
||||
case StorageOrder::NHWC:
|
||||
C = filter.dim32(3);
|
||||
break;
|
||||
case StorageOrder::NCHW:
|
||||
C = filter.dim32(1);
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown storage order: " << order_;
|
||||
}
|
||||
ConvTransposeUnpoolBase<CUDAContext>::SetOutputSize(X, Y, C);
|
||||
|
||||
int N = 0, M = 0, H = 0, W = 0, H_out = 0, W_out = 0;
|
||||
switch (order_) {
|
||||
case StorageOrder::NHWC:
|
||||
N = X.dim32(0);
|
||||
H = X.dim32(1);
|
||||
W = X.dim32(2);
|
||||
M = X.dim32(3);
|
||||
H_out = Y->dim32(1);
|
||||
W_out = Y->dim32(2);
|
||||
DCHECK_EQ(filter.dim32(1), kernel_h_);
|
||||
DCHECK_EQ(filter.dim32(1), kernel_h_);
|
||||
DCHECK_EQ(filter.dim32(2), kernel_w_);
|
||||
DCHECK_EQ(filter.dim32(3), C);
|
||||
break;
|
||||
case StorageOrder::NCHW:
|
||||
N = X.dim32(0);
|
||||
M = X.dim32(1);
|
||||
H = X.dim32(2);
|
||||
W = X.dim32(3);
|
||||
H_out = Y->dim32(2);
|
||||
W_out = Y->dim32(3);
|
||||
DCHECK_EQ(filter.dim32(1), C);
|
||||
DCHECK_EQ(filter.dim32(2), kernel_h_);
|
||||
DCHECK_EQ(filter.dim32(3), kernel_w_);
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown storage order: " << order_;
|
||||
}
|
||||
|
||||
DCHECK_EQ(bias.ndim(), 1);
|
||||
DCHECK_EQ(bias.dim32(0), C);
|
||||
|
||||
// Set up the cudnn algorithms & workspace if necessary
|
||||
bool input_changed = (X.dims() != cudnn_input_dims_);
|
||||
bool filter_changed = (filter.dims() != cudnn_filter_dims_);
|
||||
|
||||
if (input_changed || filter_changed) {
|
||||
VLOG(1) << "Changing the cudnn descriptor configurations.";
|
||||
if (input_changed) {
|
||||
cudnn_input_dims_ = X.dims();
|
||||
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||
bottom_desc_,
|
||||
GetCudnnTensorFormat(order_),
|
||||
cudnnTypeWrapper<T>::type,
|
||||
N,
|
||||
M,
|
||||
H,
|
||||
W));
|
||||
}
|
||||
if (filter_changed) {
|
||||
cudnn_filter_dims_ = filter.dims();
|
||||
CUDNN_CHECK(cudnnSetFilter4dDescriptor(
|
||||
filter_desc_,
|
||||
cudnnTypeWrapper<T>::type,
|
||||
GetCudnnTensorFormat(order_),
|
||||
M,
|
||||
C,
|
||||
kernel_h_,
|
||||
kernel_w_));
|
||||
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||
bias_desc_,
|
||||
GetCudnnTensorFormat(order_),
|
||||
cudnnTypeWrapper<T>::type,
|
||||
1,
|
||||
C,
|
||||
1,
|
||||
1));
|
||||
}
|
||||
// Set the output
|
||||
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||
top_desc_,
|
||||
GetCudnnTensorFormat(order_),
|
||||
cudnnTypeWrapper<T>::type,
|
||||
N,
|
||||
C,
|
||||
H_out,
|
||||
W_out));
|
||||
// Set the convolution descriptor
|
||||
CHECK_EQ(pad_t_, pad_b_)
|
||||
<< "The current padding scheme leads to unequal padding on the top and "
|
||||
"bottom, which is not supported by cudnn.";
|
||||
CHECK_EQ(pad_l_, pad_r_)
|
||||
<< "The current padding scheme leads to unequal padding on the left "
|
||||
"and right, which is not supported by cudnn.";
|
||||
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
|
||||
conv_desc_,
|
||||
pad_t_,
|
||||
pad_l_,
|
||||
stride_h_,
|
||||
stride_w_,
|
||||
1,
|
||||
1,
|
||||
CUDNN_CROSS_CORRELATION));
|
||||
if (deterministic_) {
|
||||
bwd_data_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
|
||||
} else if (exhaustive_search_) {
|
||||
int returned_algo_count;
|
||||
std::array<cudnnConvolutionBwdDataAlgoPerf_t, kNUM_CUDNN_BWD_DATA_ALGS>
|
||||
data_perf_stat;
|
||||
cudnn_wrapper_.with_cudnn_state(cudnn_state_, [&](CuDNNState* state) {
|
||||
state->workspace().reset();
|
||||
CUDNN_CHECK(cudnnFindConvolutionBackwardDataAlgorithm(
|
||||
state->cudnn_handle(),
|
||||
filter_desc_,
|
||||
bottom_desc_,
|
||||
conv_desc_,
|
||||
top_desc_,
|
||||
kNUM_CUDNN_BWD_DATA_ALGS,
|
||||
&returned_algo_count,
|
||||
data_perf_stat.data()));
|
||||
});
|
||||
|
||||
LogCuDNNPerfStats(data_perf_stat, returned_algo_count);
|
||||
bwd_data_algo_ = data_perf_stat[0].algo;
|
||||
} else {
|
||||
CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm(
|
||||
cudnn_wrapper_.inline_cudnn_handle(),
|
||||
filter_desc_,
|
||||
bottom_desc_,
|
||||
conv_desc_,
|
||||
top_desc_,
|
||||
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
|
||||
cudnn_ws_nbytes_limit_,
|
||||
&bwd_data_algo_));
|
||||
}
|
||||
|
||||
size_t bwd_data_ws_size;
|
||||
CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize(
|
||||
cudnn_wrapper_.inline_cudnn_handle(),
|
||||
filter_desc_,
|
||||
bottom_desc_,
|
||||
conv_desc_,
|
||||
top_desc_,
|
||||
bwd_data_algo_,
|
||||
&bwd_data_ws_size));
|
||||
cudnn_ws_nbytes_ = bwd_data_ws_size;
|
||||
LOG(INFO) << "CuDNN algorithm: " << bwd_data_algo_;
|
||||
LOG(INFO) << "CuDNN workspace size: " << bwd_data_ws_size;
|
||||
}
|
||||
|
||||
// Now, actually run the computation.
|
||||
// Filter
|
||||
cudnn_wrapper_.with_cudnn_state(cudnn_state_, [&](CuDNNState* state) {
|
||||
CUDNN_CHECK(cudnnConvolutionBackwardData(
|
||||
state->cudnn_handle(),
|
||||
cudnnTypeWrapper<T>::kOne(),
|
||||
filter_desc_,
|
||||
filter.template data<T>(),
|
||||
bottom_desc_,
|
||||
X.template data<T>(),
|
||||
conv_desc_,
|
||||
bwd_data_algo_,
|
||||
state->workspace().get(cudnn_ws_nbytes_),
|
||||
cudnn_ws_nbytes_,
|
||||
cudnnTypeWrapper<T>::kZero(),
|
||||
top_desc_,
|
||||
Y->template mutable_data<T>()));
|
||||
});
|
||||
// Bias
|
||||
CUDNN_CHECK(cudnnAddTensor(
|
||||
cudnn_wrapper_.inline_cudnn_handle(),
|
||||
cudnnTypeWrapper<T>::kOne(),
|
||||
bias_desc_,
|
||||
bias.template data<T>(),
|
||||
cudnnTypeWrapper<T>::kOne(),
|
||||
top_desc_,
|
||||
Y->template mutable_data<T>()));
|
||||
// Done.
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO(Yangqing): a lot of the function contents are very similar. Consider
|
||||
// consolidating them.
|
||||
template <typename T>
|
||||
bool CudnnConvTransposeGradientOp<T>::RunOnDevice() {
|
||||
auto& X = Input(INPUT);
|
||||
auto& filter = Input(FILTER);
|
||||
auto& dY = Input(OUTPUT_GRAD);
|
||||
auto* dfilter = Output(FILTER_GRAD);
|
||||
auto* dbias = Output(BIAS_GRAD);
|
||||
DCHECK_EQ(X.ndim(), 4);
|
||||
DCHECK_EQ(filter.ndim(), 4);
|
||||
auto* Y = Output(0);
|
||||
int C = 0;
|
||||
switch (order_) {
|
||||
case StorageOrder::NHWC:
|
||||
C = filter.dim32(3);
|
||||
break;
|
||||
case StorageOrder::NCHW:
|
||||
C = filter.dim32(1);
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown storage order: " << order_;
|
||||
}
|
||||
ConvTransposeUnpoolBase<CUDAContext>::SetOutputSize(X, Y, C);
|
||||
|
||||
int N = 0, M = 0, H = 0, W = 0, H_out = 0, W_out = 0;
|
||||
switch (order_) {
|
||||
case StorageOrder::NHWC:
|
||||
N = X.dim32(0);
|
||||
H = X.dim32(1);
|
||||
W = X.dim32(2);
|
||||
M = X.dim32(3);
|
||||
H_out = dY.dim32(1);
|
||||
W_out = dY.dim32(2);
|
||||
DCHECK_EQ(filter.dim32(1), kernel_h_);
|
||||
DCHECK_EQ(filter.dim32(1), kernel_h_);
|
||||
DCHECK_EQ(filter.dim32(2), kernel_w_);
|
||||
DCHECK_EQ(filter.dim32(3), C);
|
||||
break;
|
||||
case StorageOrder::NCHW:
|
||||
N = X.dim32(0);
|
||||
M = X.dim32(1);
|
||||
H = X.dim32(2);
|
||||
W = X.dim32(3);
|
||||
H_out = dY.dim32(2);
|
||||
W_out = dY.dim32(3);
|
||||
DCHECK_EQ(filter.dim32(1), C);
|
||||
DCHECK_EQ(filter.dim32(2), kernel_h_);
|
||||
DCHECK_EQ(filter.dim32(3), kernel_w_);
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown storage order: " << order_;
|
||||
}
|
||||
// Since we only handle LegacyPadding::NOTSET, we don't need to
|
||||
// compute padding.
|
||||
dfilter->ResizeLike(filter);
|
||||
dbias->Resize(C);
|
||||
|
||||
// Set up the cudnn algorithms & workspace if necessary
|
||||
bool input_changed = (X.dims() != cudnn_input_dims_);
|
||||
bool filter_changed = (filter.dims() != cudnn_filter_dims_);
|
||||
if (input_changed || filter_changed) {
|
||||
VLOG(1) << "Changing the cudnn descriptor configurations.";
|
||||
if (input_changed) {
|
||||
cudnn_input_dims_ = X.dims();
|
||||
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||
bottom_desc_,
|
||||
GetCudnnTensorFormat(order_),
|
||||
cudnnTypeWrapper<T>::type,
|
||||
N,
|
||||
M,
|
||||
H,
|
||||
W));
|
||||
}
|
||||
if (filter_changed) {
|
||||
cudnn_filter_dims_ = filter.dims();
|
||||
CUDNN_CHECK(cudnnSetFilter4dDescriptor(
|
||||
filter_desc_,
|
||||
cudnnTypeWrapper<T>::type,
|
||||
GetCudnnTensorFormat(order_),
|
||||
M,
|
||||
C,
|
||||
kernel_h_,
|
||||
kernel_w_));
|
||||
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||
bias_desc_,
|
||||
GetCudnnTensorFormat(order_),
|
||||
cudnnTypeWrapper<T>::type,
|
||||
1,
|
||||
C,
|
||||
1,
|
||||
1));
|
||||
}
|
||||
// Set the output
|
||||
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||
top_desc_,
|
||||
GetCudnnTensorFormat(order_),
|
||||
cudnnTypeWrapper<T>::type,
|
||||
N,
|
||||
C,
|
||||
H_out,
|
||||
W_out));
|
||||
// Set the convolution descriptor
|
||||
CHECK_EQ(pad_t_, pad_b_)
|
||||
<< "The current padding scheme leads to unequal padding on the top and "
|
||||
"bottom, which is not supported by cudnn.";
|
||||
CHECK_EQ(pad_l_, pad_r_)
|
||||
<< "The current padding scheme leads to unequal padding on the left "
|
||||
"and right, which is not supported by cudnn.";
|
||||
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
|
||||
conv_desc_,
|
||||
pad_t_,
|
||||
pad_l_,
|
||||
stride_h_,
|
||||
stride_w_,
|
||||
1,
|
||||
1,
|
||||
CUDNN_CROSS_CORRELATION));
|
||||
// Set the workspace
|
||||
|
||||
size_t bwd_filter_ws_size, fwd_ws_size;
|
||||
|
||||
if (deterministic_) {
|
||||
algo_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
|
||||
bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
|
||||
} else if (exhaustive_search_) {
|
||||
LOG(INFO) << "CUDNN Convolution bwd: doing exhaustive search.";
|
||||
// When we do an exhaustive search, we will ignore the workspace size
|
||||
// limit and simply go for the fastest algorithm. If you happen to run
|
||||
// out of memory later, you will be on your own...
|
||||
int returned_algo_count;
|
||||
// We clean up the current workspace memory so that the forward algorithm
|
||||
// is free to allocate memory.
|
||||
// Actually run the search.
|
||||
std::
|
||||
array<cudnnConvolutionBwdFilterAlgoPerf_t, kNUM_CUDNN_BWD_FILTER_ALGS>
|
||||
filter_perf_stat;
|
||||
|
||||
cudnn_wrapper_.with_cudnn_state(cudnn_state_, [&](CuDNNState* state) {
|
||||
state->workspace().reset();
|
||||
CUDNN_CHECK(cudnnFindConvolutionBackwardFilterAlgorithm(
|
||||
state->cudnn_handle(),
|
||||
top_desc_,
|
||||
bottom_desc_,
|
||||
conv_desc_,
|
||||
filter_desc_,
|
||||
kNUM_CUDNN_BWD_FILTER_ALGS,
|
||||
&returned_algo_count,
|
||||
filter_perf_stat.data()));
|
||||
});
|
||||
LogCuDNNPerfStats(filter_perf_stat, returned_algo_count);
|
||||
bwd_filter_algo_ = filter_perf_stat[0].algo;
|
||||
|
||||
std::array<cudnnConvolutionFwdAlgoPerf_t, kNUM_CUDNN_FWD_ALGS>
|
||||
fwd_perf_stat;
|
||||
cudnn_wrapper_.with_cudnn_state(cudnn_state_, [&](CuDNNState* state) {
|
||||
state->workspace().reset();
|
||||
CUDNN_CHECK(cudnnFindConvolutionForwardAlgorithm(
|
||||
state->cudnn_handle(),
|
||||
top_desc_,
|
||||
filter_desc_,
|
||||
conv_desc_,
|
||||
bottom_desc_,
|
||||
kNUM_CUDNN_BWD_DATA_ALGS,
|
||||
&returned_algo_count,
|
||||
fwd_perf_stat.data()));
|
||||
});
|
||||
|
||||
LogCuDNNPerfStats(fwd_perf_stat, returned_algo_count);
|
||||
algo_ = fwd_perf_stat[0].algo;
|
||||
} else {
|
||||
// choose backward algorithm for filter
|
||||
CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm(
|
||||
cudnn_wrapper_.inline_cudnn_handle(),
|
||||
top_desc_,
|
||||
bottom_desc_,
|
||||
conv_desc_,
|
||||
filter_desc_,
|
||||
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
|
||||
cudnn_ws_nbytes_limit_,
|
||||
&bwd_filter_algo_));
|
||||
// choose backward algo for data
|
||||
CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(
|
||||
cudnn_wrapper_.inline_cudnn_handle(),
|
||||
top_desc_,
|
||||
filter_desc_,
|
||||
conv_desc_,
|
||||
bottom_desc_,
|
||||
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
|
||||
cudnn_ws_nbytes_limit_,
|
||||
&algo_));
|
||||
}
|
||||
// get workspace for backwards filter algorithm
|
||||
CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize(
|
||||
cudnn_wrapper_.inline_cudnn_handle(),
|
||||
top_desc_,
|
||||
bottom_desc_,
|
||||
conv_desc_,
|
||||
filter_desc_,
|
||||
bwd_filter_algo_,
|
||||
&bwd_filter_ws_size));
|
||||
// get workspace for backwards data algorithm
|
||||
CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(
|
||||
cudnn_wrapper_.inline_cudnn_handle(),
|
||||
top_desc_,
|
||||
filter_desc_,
|
||||
conv_desc_,
|
||||
bottom_desc_,
|
||||
algo_,
|
||||
&fwd_ws_size));
|
||||
cudnn_ws_nbytes_ = std::max(bwd_filter_ws_size, fwd_ws_size);
|
||||
|
||||
LOG(INFO) << "CuDNN bwd algorithm: " << bwd_filter_algo_ << ", " << algo_;
|
||||
LOG(INFO) << "CuDNN workspace size: " << cudnn_ws_nbytes_;
|
||||
}
|
||||
|
||||
// Now, actually run the computation.
|
||||
CUDNN_CHECK(cudnnConvolutionBackwardBias(
|
||||
cudnn_wrapper_.inline_cudnn_handle(),
|
||||
cudnnTypeWrapper<T>::kOne(),
|
||||
top_desc_,
|
||||
dY.template data<T>(),
|
||||
cudnnTypeWrapper<T>::kZero(),
|
||||
bias_desc_,
|
||||
dbias->template mutable_data<T>()));
|
||||
|
||||
cudnn_wrapper_.with_cudnn_state(cudnn_state_, [&](CuDNNState* state) {
|
||||
CUDNN_CHECK(cudnnConvolutionBackwardFilter(
|
||||
state->cudnn_handle(),
|
||||
cudnnTypeWrapper<T>::kOne(),
|
||||
top_desc_,
|
||||
dY.template data<T>(),
|
||||
bottom_desc_,
|
||||
X.template data<T>(),
|
||||
conv_desc_,
|
||||
bwd_filter_algo_,
|
||||
state->workspace().get(cudnn_ws_nbytes_),
|
||||
cudnn_ws_nbytes_,
|
||||
cudnnTypeWrapper<T>::kZero(),
|
||||
filter_desc_,
|
||||
dfilter->template mutable_data<T>()));
|
||||
if (OutputSize() == 3) {
|
||||
// Compute the gradient w.r.t. the input.
|
||||
auto* dX = Output(INPUT_GRAD);
|
||||
dX->ResizeLike(X);
|
||||
CUDNN_CHECK(cudnnConvolutionForward(
|
||||
state->cudnn_handle(),
|
||||
cudnnTypeWrapper<T>::kOne(),
|
||||
top_desc_,
|
||||
dY.template data<T>(),
|
||||
filter_desc_,
|
||||
filter.template data<T>(),
|
||||
conv_desc_,
|
||||
algo_,
|
||||
state->workspace().get(cudnn_ws_nbytes_),
|
||||
cudnn_ws_nbytes_,
|
||||
cudnnTypeWrapper<T>::kZero(),
|
||||
bottom_desc_,
|
||||
dX->template mutable_data<T>()));
|
||||
}
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
REGISTER_CUDNN_OPERATOR(ConvTranspose, CudnnConvTransposeOp<float>);
|
||||
REGISTER_CUDNN_OPERATOR(
|
||||
ConvTransposeGradient,
|
||||
CudnnConvTransposeGradientOp<float>);
|
||||
|
||||
} // namespace caffe2
|
||||
|
|
@ -118,9 +118,13 @@ class ConvTransposeUnpoolBase : public Operator<Context> {
|
|||
return true;
|
||||
}
|
||||
|
||||
virtual bool RunOnDeviceWithOrderNCHW() = 0;
|
||||
virtual bool RunOnDeviceWithOrderNCHW() {
|
||||
CAFFE_THROW("Not implemented");
|
||||
}
|
||||
|
||||
virtual bool RunOnDeviceWithOrderNHWC() = 0;
|
||||
virtual bool RunOnDeviceWithOrderNHWC() {
|
||||
CAFFE_THROW("Not implemented");
|
||||
}
|
||||
|
||||
virtual ~ConvTransposeUnpoolBase() {}
|
||||
|
||||
|
|
|
|||
46
caffe2/operators/counter_ops.cc
Normal file
46
caffe2/operators/counter_ops.cc
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
#include "counter_ops.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
|
||||
REGISTER_CPU_OPERATOR(CreateCounter, CreateCounterOp<int32_t>);
|
||||
REGISTER_CPU_OPERATOR(ResetCounter, ResetCounterOp<int32_t>);
|
||||
REGISTER_CPU_OPERATOR(CountDown, CountDownOp<int32_t>);
|
||||
|
||||
OPERATOR_SCHEMA(CreateCounter)
|
||||
.NumInputs(0)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Creates a count-down counter with initial value specified by the 'init_count'
|
||||
argument.
|
||||
)DOC")
|
||||
.Output(0, "counter", "A blob pointing to an instance of a new counter.")
|
||||
.Arg("init_count", "Initial count for the counter, must be >= 0.");
|
||||
|
||||
OPERATOR_SCHEMA(ResetCounter)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(0)
|
||||
.SetDoc(R"DOC(
|
||||
Resets a count-down counter with initial value specified by the 'init_count'
|
||||
argument.
|
||||
)DOC")
|
||||
.Input(0, "counter", "A blob pointing to an instance of a new counter.")
|
||||
.Arg("init_count", "Resets counter to this value, must be >= 0.");
|
||||
|
||||
OPERATOR_SCHEMA(CountDown)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
If the internal count value > 0, decreases count value by 1 and outputs false,
|
||||
otherwise outputs true.
|
||||
)DOC")
|
||||
.Input(0, "counter", "A blob pointing to an instance of a counter.")
|
||||
.Output(0, "should_stop", "false unless the internal count is zero.");
|
||||
|
||||
SHOULD_NOT_DO_GRADIENT(CreateCounter);
|
||||
SHOULD_NOT_DO_GRADIENT(ResetCounter);
|
||||
SHOULD_NOT_DO_GRADIENT(CountDown);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace caffe2
|
||||
89
caffe2/operators/counter_ops.h
Normal file
89
caffe2/operators/counter_ops.h
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
#ifndef CAFFE2_OPERATORS_COUNTER_OPS_H
|
||||
#define CAFFE2_OPERATORS_COUNTER_OPS_H
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
template <typename T>
|
||||
class Counter {
|
||||
public:
|
||||
explicit Counter(T count) : count_(count) {}
|
||||
bool CountDown() {
|
||||
if (count_ > 0) {
|
||||
--count_;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void reset(T init_count) {
|
||||
count_ = init_count;
|
||||
}
|
||||
|
||||
private:
|
||||
std::atomic<T> count_;
|
||||
};
|
||||
}
|
||||
|
||||
template <typename T, class Context = CPUContext>
|
||||
class CreateCounterOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
CreateCounterOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws),
|
||||
init_count_(OperatorBase::GetSingleArgument<T>("init_count", 0)) {
|
||||
CHECK_LE(0, init_count_) << "negative init_count is not permitted.";
|
||||
}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
*OperatorBase::Output<std::unique_ptr<Counter<T>>>(0) =
|
||||
std::unique_ptr<Counter<T>>(new Counter<T>(init_count_));
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
T init_count_ = 0;
|
||||
};
|
||||
|
||||
template <typename T, class Context = CPUContext>
|
||||
class ResetCounterOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
ResetCounterOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws),
|
||||
init_count_(OperatorBase::GetSingleArgument<T>("init_count", 0)) {
|
||||
CHECK_LE(0, init_count_) << "negative init_count is not permitted.";
|
||||
}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& counterPtr = OperatorBase::Input<std::unique_ptr<Counter<T>>>(0);
|
||||
counterPtr->reset(init_count_);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
T init_count_;
|
||||
};
|
||||
|
||||
template <typename T, class Context = CPUContext>
|
||||
class CountDownOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
CountDownOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& counterPtr = OperatorBase::Input<std::unique_ptr<Counter<T>>>(0);
|
||||
auto* output = Output(0);
|
||||
output->Resize(std::vector<int>{});
|
||||
*output->template mutable_data<bool>() = counterPtr->CountDown();
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // namespace caffe2
|
||||
#endif // CAFFE2_OPERATORS_COUNTER_OPS_H_
|
||||
|
|
@ -2,6 +2,17 @@
|
|||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
|
||||
inline float sigmoid_xent_forward(float lgt, float tgt) {
|
||||
return lgt * (tgt - (lgt >= 0)) - log(1 + exp(lgt - 2 * lgt * (lgt >= 0)));
|
||||
}
|
||||
|
||||
inline float sigmoid_xent_backward(float lgt, float tgt) {
|
||||
return tgt - 1. / (1. + exp(-lgt));
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
bool LabelCrossEntropyOp<float, CPUContext>::RunOnDevice() {
|
||||
auto& X = Input(0);
|
||||
|
|
@ -26,6 +37,68 @@ bool LabelCrossEntropyOp<float, CPUContext>::RunOnDevice() {
|
|||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool SigmoidCrossEntropyWithLogitsOp<float, CPUContext>::RunOnDevice() {
|
||||
auto& logits = Input(0);
|
||||
auto& targets = Input(1);
|
||||
CAFFE_ENFORCE(logits.dims() == targets.dims());
|
||||
const auto inner_size = logits.ndim() > 0 ? logits.dims().back() : 1;
|
||||
const auto outer_size = logits.size() / inner_size;
|
||||
|
||||
auto* out = Output(0);
|
||||
if (logits.ndim() == 0) {
|
||||
out->Resize(std::vector<TIndex>{});
|
||||
} else {
|
||||
std::vector<TIndex> dims(logits.dims().begin(), logits.dims().end() - 1);
|
||||
out->Resize(dims);
|
||||
}
|
||||
auto* out_ptr = out->mutable_data<float>();
|
||||
|
||||
auto* logits_ptr = logits.data<float>();
|
||||
auto* targets_ptr = targets.data<float>();
|
||||
|
||||
auto in_idx = 0;
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
float value = 0;
|
||||
for (int j = 0; j < inner_size; ++j) {
|
||||
value += sigmoid_xent_forward(logits_ptr[in_idx], targets_ptr[in_idx]);
|
||||
++in_idx;
|
||||
}
|
||||
out_ptr[i] = -value / inner_size;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool SigmoidCrossEntropyWithLogitsGradientOp<float, CPUContext>::RunOnDevice() {
|
||||
auto& g = Input(0);
|
||||
auto& logits = Input(1);
|
||||
auto& targets = Input(2);
|
||||
CAFFE_ENFORCE(logits.dims() == targets.dims());
|
||||
const auto inner_size = logits.ndim() > 0 ? logits.dims().back() : 1;
|
||||
const auto outer_size = logits.size() / inner_size;
|
||||
CAFFE_ENFORCE(g.size() == outer_size);
|
||||
|
||||
auto* out = Output(0);
|
||||
out->ResizeLike(logits);
|
||||
auto* out_ptr = out->mutable_data<float>();
|
||||
|
||||
auto* logits_ptr = logits.data<float>();
|
||||
auto* targets_ptr = targets.data<float>();
|
||||
auto* g_ptr = g.data<float>();
|
||||
|
||||
auto in_idx = 0;
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
auto g_factor = -g_ptr[i] / inner_size;
|
||||
for (int i = 0; i < inner_size; ++i) {
|
||||
out_ptr[in_idx] = g_factor *
|
||||
sigmoid_xent_backward(logits_ptr[in_idx], targets_ptr[in_idx]);
|
||||
++in_idx;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool LabelCrossEntropyGradientOp<float, CPUContext>::RunOnDevice() {
|
||||
auto& X = Input(0);
|
||||
|
|
@ -129,6 +202,13 @@ REGISTER_CPU_OPERATOR(MakeTwoClass,
|
|||
REGISTER_CPU_OPERATOR(MakeTwoClassGradient,
|
||||
MakeTwoClassGradientOp<float, CPUContext>);
|
||||
|
||||
REGISTER_CPU_OPERATOR(
|
||||
SigmoidCrossEntropyWithLogits,
|
||||
SigmoidCrossEntropyWithLogitsOp<float, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(
|
||||
SigmoidCrossEntropyWithLogitsGradient,
|
||||
SigmoidCrossEntropyWithLogitsGradientOp<float, CPUContext>);
|
||||
|
||||
OPERATOR_SCHEMA(MakeTwoClass)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(1)
|
||||
|
|
@ -145,6 +225,22 @@ OPERATOR_SCHEMA(MakeTwoClassGradient)
|
|||
.NumInputs(1)
|
||||
.NumOutputs(1);
|
||||
|
||||
OPERATOR_SCHEMA(SigmoidCrossEntropyWithLogits)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Given two matrices logits and targets, of same shape,
|
||||
(batch_size, num_classes), computes the sigmoid cross entropy between the two.
|
||||
Returns a tensor of shape (batch_size,) of losses for each example.
|
||||
)DOC")
|
||||
.Input(0, "logits", "matrix of logits for each example and class.")
|
||||
.Input(1, "targets", "matrix of targets, same shape as logits.")
|
||||
.Output(0, "xentropy", "Vector with the total xentropy for each example.");
|
||||
|
||||
OPERATOR_SCHEMA(SigmoidCrossEntropyWithLogitsGradient)
|
||||
.NumInputs(3)
|
||||
.NumOutputs(1);
|
||||
|
||||
struct GetMakeTwoClassGradient : public GradientMakerBase {
|
||||
using GradientMakerBase::GradientMakerBase;
|
||||
vector<OperatorDef> GetGradientDefs() override {
|
||||
|
|
@ -156,5 +252,20 @@ struct GetMakeTwoClassGradient : public GradientMakerBase {
|
|||
}
|
||||
};
|
||||
REGISTER_GRADIENT(MakeTwoClass, GetMakeTwoClassGradient);
|
||||
|
||||
struct GetSigmoidCrossEntropyWithLogitsGradient : public GradientMakerBase {
|
||||
using GradientMakerBase::GradientMakerBase;
|
||||
vector<OperatorDef> GetGradientDefs() override {
|
||||
return SingleGradientDef(
|
||||
"SigmoidCrossEntropyWithLogitsGradient",
|
||||
"",
|
||||
vector<string>{GO(0), I(0), I(1)},
|
||||
vector<string>{GI(0)});
|
||||
}
|
||||
};
|
||||
REGISTER_GRADIENT(
|
||||
SigmoidCrossEntropyWithLogits,
|
||||
GetSigmoidCrossEntropyWithLogitsGradient);
|
||||
|
||||
} // namespace
|
||||
} // namespace caffe2
|
||||
|
|
|
|||
|
|
@ -62,6 +62,22 @@ class MakeTwoClassGradientOp final
|
|||
// Ouptut: dX
|
||||
};
|
||||
|
||||
template <typename T, class Context>
|
||||
class SigmoidCrossEntropyWithLogitsOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_SIMPLE_CTOR_DTOR(SigmoidCrossEntropyWithLogitsOp);
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
bool RunOnDevice() override;
|
||||
};
|
||||
|
||||
template <typename T, class Context>
|
||||
class SigmoidCrossEntropyWithLogitsGradientOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_SIMPLE_CTOR_DTOR(SigmoidCrossEntropyWithLogitsGradientOp);
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
bool RunOnDevice() override;
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_CROSS_ENTROPY_OP_H_
|
||||
|
|
|
|||
734
caffe2/operators/dataset_ops.cc
Normal file
734
caffe2/operators/dataset_ops.cc
Normal file
|
|
@ -0,0 +1,734 @@
|
|||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/tensor.h"
|
||||
#include "caffe2/utils/string_utils.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
|
||||
const char kDatasetFieldSeparator = ':';
|
||||
const char* kDatasetLengthField = "lengths";
|
||||
|
||||
// how much percent to grow the dataset when needed
|
||||
const int kDatasetGrowthPct = 40;
|
||||
|
||||
// used for lengths tensors in the dataset
|
||||
using TLength = int32_t;
|
||||
// used for all internal dataset operations (offsets, sizes to read, etc.)
|
||||
using TOffset = int64_t;
|
||||
|
||||
/**
|
||||
* Provides functionality to iterate across a list of tensors where some
|
||||
* of those tensors represent lengths in a hierarchical structure.
|
||||
*/
|
||||
class TreeIterator {
|
||||
public:
|
||||
struct FieldDesc {
|
||||
int id;
|
||||
int lengthFieldId = -1;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
explicit TreeIterator(const std::vector<std::string>& fields) {
|
||||
// populate field vector and split field names
|
||||
fields_.resize(fields.size());
|
||||
std::vector<std::vector<std::string>> nameParts(fields_.size());
|
||||
for (int i = 0; i < fields.size(); ++i) {
|
||||
auto& field = fields_.at(i);
|
||||
field.name = fields[i];
|
||||
field.id = i;
|
||||
field.lengthFieldId = -1;
|
||||
nameParts.at(i) = split(kDatasetFieldSeparator, field.name);
|
||||
}
|
||||
|
||||
// populate lengthFields
|
||||
for (const auto& field : fields_) {
|
||||
const auto& parts = nameParts.at(field.id);
|
||||
if (!parts.empty() && parts.back() == kDatasetLengthField) {
|
||||
lengthFieldIds_.push_back(field.id);
|
||||
}
|
||||
}
|
||||
|
||||
// find length-field with maximum prefix matching for each field
|
||||
for (auto& field : fields_) {
|
||||
// by default, we are matching against the root domain
|
||||
int maxMatchLevel = 1;
|
||||
int maxMatchLengthFieldId = -1;
|
||||
for (int j = 0; j < numLengthFields(); ++j) {
|
||||
const auto& lenField = lengthField(j);
|
||||
// a length field can't have itself as its length field
|
||||
if (field.id == lenField.id) {
|
||||
continue;
|
||||
}
|
||||
auto lf = nameParts.at(lenField.id);
|
||||
auto lfEnd = lf.end() - 1;
|
||||
// check whether this lengthField is a prefix for this field name
|
||||
if (std::mismatch(lf.begin(), lfEnd, nameParts.at(field.id).begin())
|
||||
.first != lfEnd) {
|
||||
continue;
|
||||
}
|
||||
if (lf.size() > maxMatchLevel) {
|
||||
maxMatchLevel = lf.size();
|
||||
maxMatchLengthFieldId = j;
|
||||
}
|
||||
}
|
||||
field.lengthFieldId = maxMatchLengthFieldId;
|
||||
}
|
||||
|
||||
// check that fields are topologically sorted
|
||||
// (no length field depends on a length defined afterwards)
|
||||
for (const auto& field : fields_) {
|
||||
const auto* lengthField = lengthFieldFor(field);
|
||||
CAFFE_ENFORCE(
|
||||
(lengthField == nullptr) || (lengthField->id < field.id),
|
||||
"Error: Field ",
|
||||
field.id,
|
||||
" (",
|
||||
field.name,
|
||||
") ",
|
||||
"depends on a field defined afterwards: ",
|
||||
lengthField->id,
|
||||
" (",
|
||||
lengthField->name,
|
||||
").");
|
||||
}
|
||||
}
|
||||
|
||||
void advance(
|
||||
const std::vector<const TLength*>& lengths,
|
||||
std::vector<TOffset>& offsets,
|
||||
std::vector<TOffset>& sizes,
|
||||
std::vector<TOffset>& limits,
|
||||
TOffset num) {
|
||||
thread_local std::vector<TOffset> newOffsets;
|
||||
CHECK_EQ(lengths.size(), numLengthFields());
|
||||
CHECK_EQ(offsets.size(), numOffsetFields());
|
||||
sizes.resize(offsets.size());
|
||||
newOffsets.resize(offsets.size());
|
||||
// first index, top level
|
||||
{
|
||||
auto limit = limits[0];
|
||||
auto offset = offsets[0];
|
||||
CAFFE_ENFORCE(limit >= offset, "Tried to advance past end of cursor.");
|
||||
TOffset total = std::min(limit - offset, num);
|
||||
sizes[0] = total;
|
||||
newOffsets[0] = offset + total;
|
||||
}
|
||||
// child indices
|
||||
for (int j = 1; j < numOffsetFields(); ++j) {
|
||||
TOffset total = 0;
|
||||
int parentOffsetId = offsetFieldIdFor(lengthField(j - 1));
|
||||
const TLength* length = lengths[j - 1] + offsets[parentOffsetId];
|
||||
for (int k = 0; k < sizes[parentOffsetId]; ++k) {
|
||||
total += *(length++);
|
||||
}
|
||||
auto offset = offsets[j];
|
||||
CAFFE_ENFORCE(
|
||||
offset + total <= limits[j],
|
||||
"Inconsistent field length: ",
|
||||
"tried to advance past the end of field ",
|
||||
j);
|
||||
sizes[j] = total;
|
||||
newOffsets[j] = offset + total;
|
||||
}
|
||||
offsets = newOffsets;
|
||||
}
|
||||
|
||||
// Corresponds to the number of fields that have "length" as its last name
|
||||
int numLengthFields() const {
|
||||
return lengthFieldIds_.size();
|
||||
}
|
||||
|
||||
// Corresponds to the number of length fields + 1 (for the top-level domain)
|
||||
int numOffsetFields() const {
|
||||
return numLengthFields() + 1;
|
||||
}
|
||||
|
||||
// Get lengthField description for the given field
|
||||
const FieldDesc* lengthFieldFor(const FieldDesc& desc) {
|
||||
return (desc.lengthFieldId == -1)
|
||||
? nullptr
|
||||
: &fields_.at(lengthFieldIds_.at(desc.lengthFieldId));
|
||||
}
|
||||
|
||||
// Get lengthField description for the given lengthFieldId, where
|
||||
// 0 <= lengthFieldId < numLengthFields()
|
||||
const FieldDesc& lengthField(int lengthFieldId) {
|
||||
return fields_.at(lengthFieldIds_.at(lengthFieldId));
|
||||
}
|
||||
|
||||
// Returns the index into the 'offset' vector for the given field.
|
||||
int offsetFieldIdFor(const FieldDesc& fieldDesc) {
|
||||
return fieldDesc.lengthFieldId + 1;
|
||||
}
|
||||
|
||||
// Returns the field description for all fields.
|
||||
const std::vector<FieldDesc>& fields() {
|
||||
return fields_;
|
||||
}
|
||||
|
||||
private:
|
||||
// Description of each field
|
||||
std::vector<FieldDesc> fields_;
|
||||
// Index into fields_ above for the fields that are lengths.
|
||||
std::vector<int> lengthFieldIds_;
|
||||
};
|
||||
|
||||
class TreeCursor {
|
||||
public:
|
||||
explicit TreeCursor(const TreeIterator& iterator) : it(iterator) {}
|
||||
std::vector<TOffset> offsets;
|
||||
std::mutex mutex_;
|
||||
TreeIterator it;
|
||||
};
|
||||
|
||||
class CreateTreeCursorOp : public Operator<CPUContext> {
|
||||
public:
|
||||
CreateTreeCursorOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator(operator_def, ws),
|
||||
fields_(OperatorBase::GetRepeatedArgument<std::string>("fields")) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
*OperatorBase::Output<std::unique_ptr<TreeCursor>>(0) =
|
||||
std::unique_ptr<TreeCursor>(new TreeCursor(TreeIterator(fields_)));
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::string> fields_;
|
||||
};
|
||||
|
||||
class ResetCursorOp : public Operator<CPUContext> {
|
||||
public:
|
||||
ResetCursorOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& cursor = OperatorBase::Input<std::unique_ptr<TreeCursor>>(0);
|
||||
std::lock_guard<std::mutex> lock(cursor->mutex_);
|
||||
cursor->offsets.clear();
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class CheckDatasetConsistencyOp : public Operator<CPUContext> {
|
||||
public:
|
||||
CheckDatasetConsistencyOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator(operator_def, ws),
|
||||
iterator_(OperatorBase::GetRepeatedArgument<std::string>("fields")) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
thread_local std::vector<const TLength*> lengths;
|
||||
thread_local std::vector<TOffset> limits;
|
||||
thread_local std::vector<TOffset> sizes;
|
||||
thread_local std::vector<TOffset> offsets;
|
||||
CAFFE_ENFORCE(
|
||||
InputSize() == iterator_.fields().size(),
|
||||
"Invalid number of fields. Expected ",
|
||||
iterator_.fields().size(),
|
||||
", got ",
|
||||
InputSize());
|
||||
sizes.resize(iterator_.numOffsetFields());
|
||||
// gather length data
|
||||
lengths.resize(iterator_.numLengthFields());
|
||||
for (int i = 0; i < lengths.size(); ++i) {
|
||||
lengths[i] = Input(iterator_.lengthField(i).id).data<TLength>();
|
||||
}
|
||||
// gather size limits
|
||||
limits.assign(sizes.size(), std::numeric_limits<TOffset>::max());
|
||||
for (int i = 0; i < iterator_.fields().size(); ++i) {
|
||||
int lengthIdx = iterator_.fields()[i].lengthFieldId + 1;
|
||||
TOffset size = (TOffset)Input(i).dims()[0];
|
||||
if (limits[lengthIdx] == std::numeric_limits<TOffset>::max()) {
|
||||
limits[lengthIdx] = size;
|
||||
} else {
|
||||
CAFFE_ENFORCE(
|
||||
limits[lengthIdx] == size,
|
||||
"Inconsistent sizes for fields belonging to same domain.",
|
||||
" Field: ",
|
||||
i,
|
||||
" (",
|
||||
iterator_.fields()[i].name,
|
||||
"); Length field index: ",
|
||||
lengthIdx,
|
||||
"); Previous size: ",
|
||||
limits[lengthIdx],
|
||||
"; New size: ",
|
||||
size);
|
||||
}
|
||||
}
|
||||
// advance to the end
|
||||
offsets.assign(sizes.size(), 0);
|
||||
iterator_.advance(lengths, offsets, sizes, limits, limits[0]);
|
||||
for (int i = 0; i < limits.size(); ++i) {
|
||||
CAFFE_ENFORCE(limits[i] == offsets[i]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
TreeIterator iterator_;
|
||||
};
|
||||
|
||||
class ReadNextBatchOp : public Operator<CPUContext> {
|
||||
public:
|
||||
ReadNextBatchOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator(operator_def, ws),
|
||||
batchSize_(OperatorBase::GetSingleArgument<int>("batch_size", 1)) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& cursor = OperatorBase::Input<std::unique_ptr<TreeCursor>>(0);
|
||||
CAFFE_ENFORCE(InputSize() == cursor->it.fields().size() + 1);
|
||||
thread_local std::vector<const TLength*> lengths;
|
||||
thread_local std::vector<TOffset> limits;
|
||||
thread_local std::vector<TOffset> sizes;
|
||||
thread_local std::vector<TOffset> offsets;
|
||||
sizes.resize(cursor->it.numOffsetFields());
|
||||
// gather length data
|
||||
lengths.resize(cursor->it.numLengthFields());
|
||||
for (int i = 0; i < lengths.size(); ++i) {
|
||||
lengths[i] = Input(cursor->it.lengthField(i).id + 1).data<int>();
|
||||
}
|
||||
// gather size limits
|
||||
limits.assign(sizes.size(), std::numeric_limits<TOffset>::max());
|
||||
for (int i = 0; i < cursor->it.fields().size(); ++i) {
|
||||
int lengthFieldIdx = cursor->it.fields()[i].lengthFieldId + 1;
|
||||
limits[lengthFieldIdx] =
|
||||
std::min(limits[lengthFieldIdx], (TOffset)Input(i + 1).dims()[0]);
|
||||
}
|
||||
// advance cursor
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(cursor->mutex_);
|
||||
if (cursor->offsets.empty()) {
|
||||
cursor->offsets.assign(sizes.size(), 0);
|
||||
}
|
||||
offsets = cursor->offsets;
|
||||
cursor->it.advance(lengths, cursor->offsets, sizes, limits, batchSize_);
|
||||
}
|
||||
// gather data
|
||||
thread_local std::vector<TIndex> outDim;
|
||||
for (int i = 0; i < cursor->it.fields().size(); ++i) {
|
||||
auto lengthIdx = cursor->it.fields()[i].lengthFieldId + 1;
|
||||
auto size = sizes[lengthIdx];
|
||||
auto offset = offsets[lengthIdx];
|
||||
auto& in = Input(i + 1);
|
||||
auto innerSize = in.size_from_dim(1);
|
||||
outDim = in.dims();
|
||||
outDim[0] = size;
|
||||
auto* out = Output(i);
|
||||
out->Resize(outDim);
|
||||
if (out->size() == 0) {
|
||||
continue;
|
||||
}
|
||||
void* src =
|
||||
(char*)in.raw_data() + offset * innerSize * in.meta().itemsize();
|
||||
void* dst = out->raw_mutable_data(in.meta());
|
||||
context_.template CopyItems<CPUContext, CPUContext>(
|
||||
in.meta(), out->size(), src, dst);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
int batchSize_;
|
||||
};
|
||||
|
||||
class ComputeOffsetOp : public Operator<CPUContext> {
|
||||
public:
|
||||
ComputeOffsetOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& cursor = OperatorBase::Input<std::unique_ptr<TreeCursor>>(0);
|
||||
CAFFE_ENFORCE(InputSize() == cursor->it.fields().size() + 1);
|
||||
auto* out = Output(0);
|
||||
std::vector<const TLength*> lengths;
|
||||
std::vector<TOffset> limits;
|
||||
std::vector<TOffset> sizes;
|
||||
std::vector<TOffset> offsets;
|
||||
sizes.resize(cursor->it.numOffsetFields());
|
||||
// gather length data
|
||||
lengths.resize(cursor->it.numLengthFields());
|
||||
for (int i = 0; i < lengths.size(); ++i) {
|
||||
lengths[i] = Input(cursor->it.lengthField(i).id + 1).data<int>();
|
||||
}
|
||||
// gather size limits
|
||||
limits.assign(sizes.size(), std::numeric_limits<TOffset>::max());
|
||||
for (int i = 0; i < cursor->it.fields().size(); ++i) {
|
||||
int lengthFieldIdx = cursor->it.fields()[i].lengthFieldId + 1;
|
||||
limits[lengthFieldIdx] =
|
||||
std::min(limits[lengthFieldIdx], (TOffset)Input(i + 1).dims()[0]);
|
||||
}
|
||||
out->Resize(limits.at(0) + 1, sizes.size());
|
||||
auto* out_data = out->mutable_data<int64_t>();
|
||||
for (int k = 0; k <= limits.at(0); k++) {
|
||||
// advance cursor
|
||||
if (cursor->offsets.empty()) {
|
||||
cursor->offsets.assign(sizes.size(), 0);
|
||||
}
|
||||
// write output
|
||||
std::copy(cursor->offsets.begin(), cursor->offsets.end(), out_data);
|
||||
out_data += sizes.size();
|
||||
cursor->it.advance(lengths, cursor->offsets, sizes, limits, 1);
|
||||
}
|
||||
cursor->offsets.assign(sizes.size(), 0); // reSet after getting meta info
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class ReadRandomBatchOp : public Operator<CPUContext> {
|
||||
public:
|
||||
ReadRandomBatchOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator(operator_def, ws),
|
||||
batchSize_(OperatorBase::GetSingleArgument<int>("batch_size", 1)) {}
|
||||
bool RunOnDevice() override {
|
||||
auto& cursor = OperatorBase::Input<std::unique_ptr<TreeCursor>>(0);
|
||||
auto& idxblob = Input(1);
|
||||
auto& offsetsmat = Input(2);
|
||||
CAFFE_ENFORCE(InputSize() == cursor->it.fields().size() + 3);
|
||||
auto idxvec = idxblob.template data<int64_t>();
|
||||
auto& offsetdim = offsetsmat.dims();
|
||||
// gather data
|
||||
thread_local std::vector<TIndex> outDim;
|
||||
int64_t idx;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(cursor->mutex_);
|
||||
cursor->offsets.resize(1);
|
||||
idx = cursor->offsets.at(0);
|
||||
cursor->offsets.at(0) += batchSize_;
|
||||
}
|
||||
|
||||
for (int i = 0; i < cursor->it.fields().size(); ++i) {
|
||||
auto lengthIdx = cursor->it.fields()[i].lengthFieldId + 1;
|
||||
auto& in = Input(i + 3);
|
||||
outDim = in.dims();
|
||||
outDim.at(0) = 0;
|
||||
auto idxbegin = idx;
|
||||
for (int j = 0; j < batchSize_; ++j) {
|
||||
if (idx >= idxblob.size()) {
|
||||
break;
|
||||
}
|
||||
CAFFE_ENFORCE(
|
||||
(idxvec[idx] + 1) * offsetdim[1] + lengthIdx < offsetsmat.size(),
|
||||
"Out of bound when trying to get elem from offsetsmat");
|
||||
auto offsetptr = offsetsmat.template data<TOffset>() +
|
||||
idxvec[idx] * offsetdim[1] + lengthIdx;
|
||||
auto offset = *offsetptr;
|
||||
auto size = *(offsetptr + offsetdim[1]) - offset;
|
||||
outDim.at(0) += size; // accumulate over the batch
|
||||
idx++;
|
||||
}
|
||||
idx = idxbegin; // reSet
|
||||
auto* out = Output(i);
|
||||
out->Resize(outDim);
|
||||
if (out->size() == 0) {
|
||||
continue;
|
||||
}
|
||||
auto innerSize = in.size_from_dim(1);
|
||||
auto dst = static_cast<char*>(out->raw_mutable_data(in.meta()));
|
||||
int block_size = in.size() / in.dim(0);
|
||||
int block_bytesize = in.nbytes() / in.dim(0);
|
||||
int start = 0;
|
||||
for (int j = 0; j < batchSize_; ++j) {
|
||||
if (idx >= idxblob.size()) {
|
||||
break;
|
||||
}
|
||||
auto offsetptr = offsetsmat.template data<TOffset>() +
|
||||
idxvec[idx] * offsetdim[1] + lengthIdx;
|
||||
auto offset = *offsetptr;
|
||||
auto size = *(offsetptr + offsetdim[1]) - offset;
|
||||
// copy data
|
||||
void* src =
|
||||
(char*)in.raw_data() + offset * innerSize * in.meta().itemsize();
|
||||
context_.template CopyItems<CPUContext, CPUContext>(
|
||||
in.meta(), size * block_size, src, dst + start * block_bytesize);
|
||||
start += size;
|
||||
idx++;
|
||||
}
|
||||
idx = idxbegin; // reSet
|
||||
}
|
||||
return true;
|
||||
}
|
||||
int batchSize_;
|
||||
};
|
||||
|
||||
template <class Context>
|
||||
class AppendOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
AppendOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& a = Input(0);
|
||||
auto& b = Input(1);
|
||||
auto* c = Output(0);
|
||||
CAFFE_ENFORCE(b.ndim() >= 1);
|
||||
if (a.size() == 0) {
|
||||
c->CopyFrom(b);
|
||||
return true;
|
||||
}
|
||||
CAFFE_ENFORCE(&a == c, "First argument must be in-place.");
|
||||
CAFFE_ENFORCE(c->ndim() == b.ndim());
|
||||
CAFFE_ENFORCE(b.ndim() == c->ndim());
|
||||
CAFFE_ENFORCE(a.meta() == b.meta());
|
||||
for (int i = 1; i < a.ndim(); ++i) {
|
||||
CAFFE_ENFORCE(a.dims()[i] == b.dims()[i]);
|
||||
}
|
||||
auto oldSize = c->size();
|
||||
c->Extend(b.dims()[0], kDatasetGrowthPct, &context_);
|
||||
auto* dst = (char*)c->raw_mutable_data() + oldSize * b.meta().itemsize();
|
||||
context_.template CopyItems<Context, Context>(
|
||||
b.meta(), b.size(), b.raw_data(), dst);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <class Context>
|
||||
class AtomicAppendOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
AtomicAppendOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& mutex = OperatorBase::Input<std::unique_ptr<std::mutex>>(0);
|
||||
const auto numFields = (InputSize() - 1) / 2;
|
||||
CAFFE_ENFORCE(OutputSize() == numFields);
|
||||
|
||||
std::lock_guard<std::mutex> guard(*mutex);
|
||||
|
||||
// 1: checks
|
||||
for (int i = 0; i < numFields; ++i) {
|
||||
auto& a = Input(1 + i);
|
||||
auto& b = Input(1 + i + numFields);
|
||||
auto* c = Output(i);
|
||||
CAFFE_ENFORCE(b.ndim() >= 1);
|
||||
if (a.size() == 0) {
|
||||
continue;
|
||||
}
|
||||
CAFFE_ENFORCE(
|
||||
(void*)&a == (void*)c, "Appended-to arguments must be in-place.");
|
||||
CAFFE_ENFORCE(c->ndim() == b.ndim());
|
||||
CAFFE_ENFORCE(b.ndim() == c->ndim());
|
||||
CAFFE_ENFORCE(a.meta() == b.meta());
|
||||
for (int j = 1; j < a.ndim(); ++j) {
|
||||
CAFFE_ENFORCE(a.dims()[j] == b.dims()[j]);
|
||||
}
|
||||
}
|
||||
|
||||
// 2: copies
|
||||
for (int i = 0; i < numFields; ++i) {
|
||||
auto& a = Input(1 + i);
|
||||
auto& b = Input(1 + i + numFields);
|
||||
auto* c = Output(i);
|
||||
if (a.size() == 0) {
|
||||
c->CopyFrom(b);
|
||||
continue;
|
||||
}
|
||||
auto oldSize = c->size();
|
||||
c->Extend(b.dims()[0], kDatasetGrowthPct, &context_);
|
||||
auto* dst = (char*)c->raw_mutable_data() + oldSize * b.meta().itemsize();
|
||||
context_.template CopyItems<Context, Context>(
|
||||
b.meta(), b.size(), b.raw_data(), dst);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_CPU_OPERATOR(CreateTreeCursor, CreateTreeCursorOp);
|
||||
REGISTER_CPU_OPERATOR(ResetCursor, ResetCursorOp);
|
||||
REGISTER_CPU_OPERATOR(ReadNextBatch, ReadNextBatchOp);
|
||||
REGISTER_CPU_OPERATOR(ComputeOffset, ComputeOffsetOp);
|
||||
REGISTER_CPU_OPERATOR(ReadRandomBatch, ReadRandomBatchOp);
|
||||
REGISTER_CPU_OPERATOR(CheckDatasetConsistency, CheckDatasetConsistencyOp);
|
||||
REGISTER_CPU_OPERATOR(Append, AppendOp<CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(AtomicAppend, AtomicAppendOp<CPUContext>);
|
||||
|
||||
OPERATOR_SCHEMA(CreateTreeCursor)
|
||||
.NumInputs(0)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Creates a cursor to iterate through a list of tensors, where some of those
|
||||
tensors contains the lengths in a nested schema. The schema is determined by
|
||||
the `fields` arguments.
|
||||
|
||||
For example, to represent the following schema:
|
||||
|
||||
Struct(
|
||||
a=Int(),
|
||||
b=List(List(Int),
|
||||
c=List(
|
||||
Struct(
|
||||
c1=String,
|
||||
c2=List(Int),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
the field list will be:
|
||||
[
|
||||
"a",
|
||||
"b:lengths",
|
||||
"b:values:lengths",
|
||||
"b:values:values",
|
||||
"c:lengths",
|
||||
"c:c1",
|
||||
"c:c2:lengths",
|
||||
"c:c2:values",
|
||||
]
|
||||
|
||||
And for the following instance of the struct:
|
||||
|
||||
Struct(
|
||||
a=3,
|
||||
b=[[4, 5], [6, 7, 8], [], [9]],
|
||||
c=[
|
||||
Struct(c1='alex', c2=[10, 11]),
|
||||
Struct(c1='bob', c2=[12]),
|
||||
],
|
||||
)
|
||||
|
||||
The values of the fields will be:
|
||||
{
|
||||
"a": [3],
|
||||
"b:lengths": [4],
|
||||
"b:values:lengths": [2, 3, 0, 1],
|
||||
"b:values:values": [4, 5, 6, 7, 8, 9],
|
||||
"c:lengths": [2],
|
||||
"c:c1": ["alex", "bob"],
|
||||
"c:c2:lengths": [2, 1],
|
||||
"c:c2:values", [10, 11, 12],
|
||||
}
|
||||
|
||||
In general, every field name in the format "{prefix}:lengths" defines a domain
|
||||
"{prefix}", and every subsequent field in the format "{prefx}:{field}" will
|
||||
be in that domain, and the length of the domain is provided for each entry of
|
||||
the parent domain. In the example, "b:lengths" defines a domain of length 4, so
|
||||
every field under domain "b" will have 4 entries.
|
||||
The "lengths" field for a given domain must appear before any reference to
|
||||
that domain.
|
||||
|
||||
Returns a pointer to an instance of the Cursor, which keeps the current offset
|
||||
on each of the domains defined by `fields`. Cursor also ensures thread-safety
|
||||
such that ReadNextBatch and ResetCursor can be used safely in parallel.
|
||||
|
||||
A cursor does not contain data per se, so calls to ReadNextBatch actually need
|
||||
to pass a list of blobs containing the data to read for each one of the fields.
|
||||
)DOC")
|
||||
.Output(0, "cursor", "A blob pointing to an instance of a new TreeCursor.")
|
||||
.Arg(
|
||||
"fields",
|
||||
"A list of strings each one representing a field of the dataset.");
|
||||
|
||||
OPERATOR_SCHEMA(ResetCursor)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(0)
|
||||
.SetDoc(R"DOC(
|
||||
Resets the offsets for the given TreeCursor. This operation is thread safe.
|
||||
)DOC")
|
||||
.Input(0, "cursor", "A blob containing a pointer to the cursor.");
|
||||
|
||||
OPERATOR_SCHEMA(ReadNextBatch)
|
||||
.NumInputs(1, INT_MAX)
|
||||
.NumOutputs(1, INT_MAX)
|
||||
.SetDoc(R"DOC(
|
||||
Read the next batch of examples out of the given cursor and data blobs.
|
||||
|
||||
Input(0) is a blob pointing to a TreeCursor, and
|
||||
[Input(1),... Input(num_fields)] a list of tensors containing the data for
|
||||
each field of the dataset.
|
||||
|
||||
ReadNextBatch is thread safe.
|
||||
)DOC")
|
||||
.Input(0, "cursor", "A blob containing a pointer to the cursor.")
|
||||
.Input(1, "dataset_field_0", "First dataset field")
|
||||
.Output(0, "field_0", "Tensor containing the next batch for field 0.")
|
||||
.Arg("batch_size", "Number of top-level entries to read.");
|
||||
|
||||
OPERATOR_SCHEMA(ComputeOffset)
|
||||
.NumInputs(1, INT_MAX)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Compute the offsets matrix given cursor and data blobs. Need to be ran at
|
||||
beginning or after reseting cursor
|
||||
|
||||
Input(0) is a blob pointing to a TreeCursor, and
|
||||
[Input(1),... Input(num_fields)] a list of tensors containing the data for
|
||||
each field of the dataset.
|
||||
|
||||
ComputeOffset is thread safe.
|
||||
)DOC")
|
||||
.Input(0, "cursor", "A blob containing a pointer to the cursor.")
|
||||
.Input(1, "dataset_field_0", "First dataset field")
|
||||
.Output(0, "field_0", "Tensor containing offset info for this chunk.");
|
||||
|
||||
OPERATOR_SCHEMA(ReadRandomBatch)
|
||||
.NumInputs(1, INT_MAX)
|
||||
.NumOutputs(1, INT_MAX)
|
||||
.SetDoc(R"DOC(
|
||||
Read the next batch of examples out of the given cursor,
|
||||
idx blob, offset matrix and data blobs.
|
||||
|
||||
Input(0) is a blob pointing to a TreeCursor,
|
||||
Input(1) is a blob pointing to the shuffled idx
|
||||
Input(2) is a blob pointing to the offset matrix and
|
||||
[Input(3),... Input(num_fields)] a list of tensors containing the data for
|
||||
each field of the dataset.
|
||||
|
||||
ReadRandomBatch is thread safe.
|
||||
)DOC")
|
||||
.Input(0, "cursor", "A blob containing a pointer to the cursor.")
|
||||
.Input(1, "idx", "idx with a shuffled order.")
|
||||
.Input(2, "offsetsmat", "offset matrix containing length offset info.")
|
||||
.Input(3, "dataset_field_0", "First dataset field")
|
||||
.Output(0, "field_0", "Tensor containing the next batch for field 0.")
|
||||
.Arg("batch_size", "Number of top-level entries to read.");
|
||||
|
||||
OPERATOR_SCHEMA(CheckDatasetConsistency)
|
||||
.NumInputs(1, INT_MAX)
|
||||
.NumOutputs(0)
|
||||
.SetDoc(R"DOC(
|
||||
Checks that the given data fields represents a consistent dataset unther
|
||||
the schema specified by the `fields` argument. Operator fails if the fields
|
||||
are not consistent. If data is consistent, each field's data can be safely
|
||||
appended to an existing dataset, keeping it consistent.
|
||||
)DOC")
|
||||
.Input(0, "field_0", "Data for field 0.")
|
||||
.Arg(
|
||||
"fields",
|
||||
"List of strings representing the string names in the format"
|
||||
"specified in the doc for CreateTreeCursor.");
|
||||
|
||||
OPERATOR_SCHEMA(Append)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(1)
|
||||
.EnforceInplace({{0, 0}})
|
||||
.SetDoc(R"DOC(
|
||||
Append input 2 to the end of input 1.
|
||||
Input 1 must be the same as output, that is, it is required to be in-place.
|
||||
Input 1 may have to be re-allocated in order for accommodate to the new size.
|
||||
Currently, an exponential growth ratio is used in order to ensure amortized
|
||||
constant time complexity.
|
||||
All except the outer-most dimension must be the same between input 1 and 2.
|
||||
)DOC")
|
||||
.Input(0, "dataset", "The tensor to be appended to.")
|
||||
.Input(1, "new_data", "Tensor to append to the end of dataset.")
|
||||
.Output(0, "dataset", "Same as input 0, representing the mutated tensor.");
|
||||
|
||||
OPERATOR_SCHEMA(AtomicAppend)
|
||||
.NumInputs(3, INT_MAX)
|
||||
.NumOutputs(1, INT_MAX)
|
||||
.AllowInplace([](int in, int out) { return in == out + 1; });
|
||||
|
||||
SHOULD_NOT_DO_GRADIENT(CreateTreeCursor);
|
||||
SHOULD_NOT_DO_GRADIENT(ResetCursor);
|
||||
SHOULD_NOT_DO_GRADIENT(ReadNextBatch);
|
||||
SHOULD_NOT_DO_GRADIENT(ComputeOffset);
|
||||
SHOULD_NOT_DO_GRADIENT(ReadRandomBatch);
|
||||
SHOULD_NOT_DO_GRADIENT(CheckDatasetConsistency);
|
||||
SHOULD_NOT_DO_GRADIENT(Append);
|
||||
SHOULD_NOT_DO_GRADIENT(AtomicAppend);
|
||||
}
|
||||
}
|
||||
|
|
@ -2,16 +2,16 @@
|
|||
#define CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
using NumericTypes = TensorTypes<int32_t, int64_t, float, double>;
|
||||
class SameTypeAsInput {};
|
||||
|
||||
template<typename OutputTemplate, typename InputType>
|
||||
template <typename OutputTemplate, typename InputType>
|
||||
struct TypeForOutput {
|
||||
using value = OutputTemplate;
|
||||
};
|
||||
|
|
@ -21,12 +21,26 @@ struct TypeForOutput<SameTypeAsInput, InputType> {
|
|||
using value = InputType;
|
||||
};
|
||||
|
||||
template <typename InputTypes, class Context, class Functor,
|
||||
/**
|
||||
* Generic meta-operator that is able to processes element-wise operations on
|
||||
* a single-element tensor, returning a tensor with same shape, and either of
|
||||
* the same type as the input or of a specified result type.
|
||||
*
|
||||
* The functor provided must implement operator() as a template on input and
|
||||
* output types, and on a Context. Moreover, it needs to provide a constructor
|
||||
* that takes OperatorBase& as argument. This is in order to consume arguments
|
||||
* passed to the operator instance.
|
||||
*/
|
||||
template <
|
||||
typename InputTypes,
|
||||
class Context,
|
||||
class Functor,
|
||||
class OutputType = SameTypeAsInput>
|
||||
class UnaryElementwiseOp : public Operator<Context> {
|
||||
class UnaryElementwiseWithArgsOp : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
USE_SIMPLE_CTOR_DTOR(UnaryElementwiseOp);
|
||||
UnaryElementwiseWithArgsOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws), functor(*this) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
return DispatchHelper<InputTypes>::call(this, Input(0));
|
||||
|
|
@ -38,11 +52,67 @@ class UnaryElementwiseOp : public Operator<Context> {
|
|||
auto* output = Output(0);
|
||||
output->ResizeLike(input);
|
||||
using R = typename TypeForOutput<OutputType, T>::value;
|
||||
Functor()(input.size(), input.template data<T>(),
|
||||
output->template mutable_data<R>(), &context_);
|
||||
functor(
|
||||
input.size(),
|
||||
input.template data<T>(),
|
||||
output->template mutable_data<R>(),
|
||||
&context_);
|
||||
return true;
|
||||
}
|
||||
|
||||
Functor functor;
|
||||
};
|
||||
|
||||
/**
|
||||
* WithDefaultConstructor is a functor that can be used as the functor of an
|
||||
* UnaryElementwiseWithArgsOp. It simply forwards the operator() call into
|
||||
* another functor that doesn't accept arguments in its constructor.
|
||||
*/
|
||||
template <typename Functor>
|
||||
struct WithDefaultConstructor {
|
||||
explicit WithDefaultConstructor(OperatorBase& op) {}
|
||||
|
||||
template <typename In, typename Out, typename Context>
|
||||
void operator()(int n, const In* in, Out* out, Context* c) {
|
||||
Functor()(n, in, out, c);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* UnaryElementwiseOp is a wrapper around UnaryElementwiseWithArgsOp, with the
|
||||
* difference that it takes a functor with default constructor, e.g. that does
|
||||
* not need to take into consideration any arguments during operator creation.
|
||||
*/
|
||||
template <
|
||||
typename InputTypes,
|
||||
class Context,
|
||||
class Functor,
|
||||
class OutputType = SameTypeAsInput>
|
||||
using UnaryElementwiseOp = UnaryElementwiseWithArgsOp<
|
||||
InputTypes,
|
||||
Context,
|
||||
WithDefaultConstructor<Functor>,
|
||||
OutputType>;
|
||||
|
||||
/**
|
||||
* ForEach is a unary functor that forwards each element of the input array
|
||||
* into the elementwise Functor provided, and gathers the results of each
|
||||
* call into the resulting array. Use it as an adaptor if you want to create
|
||||
* a UnaryElementwiseOp that acts on each element of the tensor per function
|
||||
* call -- this is resonable for complex types where vectorization wouldn't
|
||||
* be much of a gain, performance-wise.
|
||||
*/
|
||||
template <typename Functor>
|
||||
struct ForEach {
|
||||
explicit ForEach(OperatorBase& op) : functor(op) {}
|
||||
|
||||
template <typename In, typename Out, typename Context>
|
||||
void operator()(int n, const In* in, Out* out, Context* c) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
out[i] = functor(in[i]);
|
||||
}
|
||||
}
|
||||
Functor functor;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -113,7 +183,7 @@ class BinaryElementwiseOp : public Operator<Context> {
|
|||
return false;
|
||||
}
|
||||
for (int i = 0; i < b.size(); ++i) {
|
||||
if (a[a.size()-1-i] != b[b.size()-1-i]) {
|
||||
if (a[a.size() - 1 - i] != b[b.size() - 1 - i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -192,8 +262,12 @@ class DivGradientOp final : public Operator<Context> {
|
|||
} \
|
||||
}; \
|
||||
template <class DC> \
|
||||
using name##Op = BinaryElementwiseOp<NumericTypes, DC, \
|
||||
name##Functor, SameTypeAsInput, true>
|
||||
using name##Op = BinaryElementwiseOp< \
|
||||
NumericTypes, \
|
||||
DC, \
|
||||
name##Functor, \
|
||||
SameTypeAsInput, \
|
||||
true>
|
||||
|
||||
CAFFE2_BINARY_FUNCTOR_WRAPPER(Add);
|
||||
CAFFE2_BINARY_FUNCTOR_WRAPPER(Sub);
|
||||
|
|
@ -225,8 +299,8 @@ CAFFE2_BINARY_FUNCTOR_WRAPPER(Div);
|
|||
} \
|
||||
}; \
|
||||
template <class DC> \
|
||||
using name##Op = BinaryElementwiseOp< \
|
||||
NumericTypes, DC, name##Functor, bool, true>
|
||||
using name##Op = \
|
||||
BinaryElementwiseOp<NumericTypes, DC, name##Functor, bool, true>
|
||||
|
||||
CAFFE2_BINARY_FUNCTOR_BINARY_RESULT_WRAPPER(LT);
|
||||
CAFFE2_BINARY_FUNCTOR_BINARY_RESULT_WRAPPER(LE);
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ REGISTER_CPU_OPERATOR(UniformFill, UniformFillOp<float, CPUContext>);
|
|||
REGISTER_CPU_OPERATOR(UniformIntFill, UniformFillOp<int, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(ConstantFill, ConstantFillOp<float, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(ConstantIntFill, ConstantFillOp<int, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(ConstantBoolFill, ConstantFillOp<bool, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(GivenTensorFill, GivenTensorFillOp<float, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(GivenTensorIntFill, GivenTensorFillOp<int, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(GaussianFill, GaussianFillOp<float, CPUContext>);
|
||||
|
|
@ -30,6 +31,10 @@ OPERATOR_SCHEMA(UniformFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}}
|
|||
OPERATOR_SCHEMA(UniformIntFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
||||
OPERATOR_SCHEMA(ConstantFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
||||
OPERATOR_SCHEMA(ConstantIntFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
||||
OPERATOR_SCHEMA(ConstantBoolFill)
|
||||
.NumInputs(0, 1)
|
||||
.NumOutputs(1)
|
||||
.AllowInplace({{0, 0}});
|
||||
OPERATOR_SCHEMA(GivenTensorFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
||||
OPERATOR_SCHEMA(GivenTensorIntFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
||||
OPERATOR_SCHEMA(GaussianFill).NumInputs(0, 1).NumOutputs(1).AllowInplace({{0, 0}});
|
||||
|
|
|
|||
|
|
@ -56,14 +56,12 @@ class FullyConnectedOp final : public Operator<Context> {
|
|||
CAFFE_ENFORCE(N == b.dim32(0), dimErrorString());
|
||||
CAFFE_ENFORCE(N == b.size(), dimErrorString());
|
||||
|
||||
// Create the Y shape (without allocation)
|
||||
static thread_local vector<TIndex> Y_shape;
|
||||
Y_shape = X.dims();
|
||||
Y_shape_cache_ = X.dims();
|
||||
// This is an invariant of canonical_axis, so we can DCHECK.
|
||||
DCHECK_LE(canonical_axis + 1, Y_shape.size());
|
||||
Y_shape.resize(canonical_axis + 1);
|
||||
Y_shape[canonical_axis] = N;
|
||||
Y->Resize(Y_shape);
|
||||
DCHECK_LE(canonical_axis + 1, Y_shape_cache_.size());
|
||||
Y_shape_cache_.resize(canonical_axis + 1);
|
||||
Y_shape_cache_[canonical_axis] = N;
|
||||
Y->Resize(Y_shape_cache_);
|
||||
CAFFE_ENFORCE(M * N == Y->size(), dimErrorString());
|
||||
|
||||
// W * x
|
||||
|
|
@ -88,6 +86,9 @@ class FullyConnectedOp final : public Operator<Context> {
|
|||
|
||||
protected:
|
||||
size_t axis_{1};
|
||||
// A local vector to cache the output shape so we don't need to recreate
|
||||
// a vector object every time we run Run().
|
||||
vector<TIndex> Y_shape_cache_;
|
||||
Tensor<Context> bias_multiplier_;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@
|
|||
#include <limits>
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
using IndexKeyTypes = TensorTypes<int32_t, int64_t, std::string>;
|
||||
using TIndexValue = int64_t;
|
||||
|
|
@ -22,12 +21,17 @@ struct IndexBase {
|
|||
void Freeze() { frozen_ = true; }
|
||||
virtual ~IndexBase() {}
|
||||
const TypeMeta& Type() const { return meta_; }
|
||||
TIndexValue Size() {
|
||||
std::lock_guard<std::mutex> guard(dictMutex_);
|
||||
return nextId_;
|
||||
}
|
||||
|
||||
protected:
|
||||
int64_t maxElements_;
|
||||
TypeMeta meta_;
|
||||
TIndexValue nextId_{1}; // guarded by dictMutex_
|
||||
std::atomic<bool> frozen_{false};
|
||||
std::mutex dictMutex_;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
|
|
@ -96,7 +100,6 @@ struct Index: IndexBase {
|
|||
}
|
||||
|
||||
std::unordered_map<T, TIndexValue> dict_;
|
||||
std::mutex dictMutex_;
|
||||
};
|
||||
|
||||
template<class T>
|
||||
|
|
@ -142,7 +145,9 @@ class IndexGetOp: public Operator<CPUContext> {
|
|||
class IndexLoadOp: public Operator<CPUContext> {
|
||||
public:
|
||||
IndexLoadOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator(operator_def, ws) {}
|
||||
: Operator(operator_def, ws),
|
||||
skipFirstEntry_(
|
||||
OperatorBase::GetSingleArgument<int>("skip_first_entry", 0)) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
return DispatchHelper<IndexKeyTypes>::call(this, Input(1));
|
||||
|
|
@ -153,8 +158,18 @@ class IndexLoadOp: public Operator<CPUContext> {
|
|||
auto* dict = dynamic_cast_if_rtti<Index<T>*>(base.get());
|
||||
CAFFE_ENFORCE(dict, "Wrong dictionary type given input keys.");
|
||||
const auto& keys = Input(1);
|
||||
return dict->Load(keys.data<T>(), keys.size());
|
||||
const auto* keys_data = keys.data<T>();
|
||||
auto keys_size = keys.size();
|
||||
if (skipFirstEntry_) {
|
||||
CAFFE_ENFORCE(keys.size() > 0);
|
||||
++keys_data;
|
||||
--keys_size;
|
||||
}
|
||||
return dict->Load(keys_data, keys_size);
|
||||
}
|
||||
|
||||
private:
|
||||
bool skipFirstEntry_;
|
||||
};
|
||||
|
||||
class IndexStoreOp: public Operator<CPUContext> {
|
||||
|
|
@ -188,6 +203,19 @@ class IndexFreezeOp: public Operator<CPUContext> {
|
|||
}
|
||||
};
|
||||
|
||||
class IndexSizeOp : public Operator<CPUContext> {
|
||||
public:
|
||||
IndexSizeOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& base = OperatorBase::Input<std::unique_ptr<IndexBase>>(0);
|
||||
auto* out = Output(0);
|
||||
out->Resize(std::vector<TIndex>{});
|
||||
*out->mutable_data<TIndexValue>() = base->Size();
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_CPU_OPERATOR(IntIndexCreate, IndexCreateOp<int32_t>);
|
||||
REGISTER_CPU_OPERATOR(LongIndexCreate, IndexCreateOp<int64_t>);
|
||||
|
|
@ -197,6 +225,7 @@ REGISTER_CPU_OPERATOR(IndexGet, IndexGetOp);
|
|||
REGISTER_CPU_OPERATOR(IndexLoad, IndexLoadOp);
|
||||
REGISTER_CPU_OPERATOR(IndexStore, IndexStoreOp);
|
||||
REGISTER_CPU_OPERATOR(IndexFreeze, IndexFreezeOp);
|
||||
REGISTER_CPU_OPERATOR(IndexSize, IndexSizeOp);
|
||||
|
||||
OPERATOR_SCHEMA(IntIndexCreate)
|
||||
.NumInputs(0)
|
||||
|
|
@ -250,7 +279,6 @@ Should not be called concurrently with IndexGet.
|
|||
)DOC")
|
||||
.Input(0, "handle", "Pointer to an Index instance.");
|
||||
|
||||
|
||||
OPERATOR_SCHEMA(IndexLoad)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(0)
|
||||
|
|
@ -259,7 +287,12 @@ Loads the index from the given 1-D tensor. Elements in the tensor will be given
|
|||
consecutive indexes starting at 1. Fails if tensor contains repeated elements.
|
||||
)DOC")
|
||||
.Input(0, "handle", "Pointer to an Index instance.")
|
||||
.Input(1, "items", "1-D tensor with elements starting with index 1.");
|
||||
.Input(1, "items", "1-D tensor with elements starting with index 1.")
|
||||
.Arg(
|
||||
"skip_first_entry",
|
||||
"If set, skips the first entry of the tensor. This allows "
|
||||
"to load tensors that are aligned with an embedding, where the first "
|
||||
"entry corresponds to the default 0 index entry.");
|
||||
|
||||
OPERATOR_SCHEMA(IndexStore)
|
||||
.NumInputs(1)
|
||||
|
|
@ -271,6 +304,15 @@ for unknowns, the first element of the output tensor will be element of index 1.
|
|||
.Input(0, "handle", "Pointer to an Index instance.")
|
||||
.Output(0, "items", "1-D tensor with elements starting with index 1.");
|
||||
|
||||
OPERATOR_SCHEMA(IndexSize)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Returns the number of entries currently present in the index.
|
||||
)DOC")
|
||||
.Input(0, "handle", "Pointer to an Index instance.")
|
||||
.Output(0, "items", "Scalar int64 tensor with number of entries.");
|
||||
|
||||
NO_GRADIENT(IndexGetOp);
|
||||
NO_GRADIENT(IntIndexCreate);
|
||||
NO_GRADIENT(LongIndexCreate);
|
||||
|
|
@ -278,5 +320,5 @@ NO_GRADIENT(StringIndexCreate);
|
|||
SHOULD_NOT_DO_GRADIENT(IndexFreeze);
|
||||
SHOULD_NOT_DO_GRADIENT(IndexLoad);
|
||||
SHOULD_NOT_DO_GRADIENT(IndexStore);
|
||||
|
||||
SHOULD_NOT_DO_GRADIENT(IndexSize);
|
||||
} // namespace caffe2
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <cstdio>
|
||||
#include <map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/db.h"
|
||||
|
|
@ -65,12 +66,18 @@ class LoadOp final : public Operator<Context> {
|
|||
// chunks. This way we can make sure that all chunks were loaded in the end.
|
||||
// This is a map from output index to current size of the blob
|
||||
std::map<int, size_t> blobSizes;
|
||||
|
||||
std::unordered_set<string> loaded;
|
||||
for (; cursor->Valid(); cursor->Next()) {
|
||||
const string& key = cursor->key();
|
||||
if (!output_indices_.count(key)) {
|
||||
VLOG(1) << "Key " << key << " not used. Skipping.";
|
||||
} else {
|
||||
CAFFE_ENFORCE(
|
||||
loaded.count(key) == 0,
|
||||
"Multiple copies of blob ",
|
||||
key,
|
||||
" found in the db.");
|
||||
|
||||
VLOG(2) << "Deserializing blob " << key;
|
||||
BlobProto proto;
|
||||
CHECK(proto.ParseFromString(cursor->value()));
|
||||
|
|
@ -101,6 +108,15 @@ class LoadOp final : public Operator<Context> {
|
|||
blobSize.first->second = blob->Get<Tensor<Context>>().size();
|
||||
}
|
||||
}
|
||||
|
||||
if (!proto.has_tensor() ||
|
||||
blobSize.first->second >= blob->Get<Tensor<Context>>().size()) {
|
||||
loaded.insert(key);
|
||||
}
|
||||
|
||||
if (loaded.size() >= OutputSize()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -116,6 +132,8 @@ class LoadOp final : public Operator<Context> {
|
|||
blobSize.second);
|
||||
}
|
||||
}
|
||||
|
||||
CHECK_EQ(loaded.size(), OutputSize());
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
|||
107
caffe2/operators/one_hot_ops.cc
Normal file
107
caffe2/operators/one_hot_ops.cc
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/tensor.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
|
||||
class OneHotOp : public Operator<CPUContext> {
|
||||
public:
|
||||
OneHotOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& indices = Input(0);
|
||||
auto& index_size_tensor = Input(1);
|
||||
CAFFE_ENFORCE(indices.ndim() == 1);
|
||||
CAFFE_ENFORCE(index_size_tensor.size() == 1);
|
||||
auto batch_size = indices.size();
|
||||
auto index_size = *index_size_tensor.data<int64_t>();
|
||||
|
||||
auto* indices_ptr = indices.data<int64_t>();
|
||||
auto* one_hots = Output(0);
|
||||
one_hots->Resize(std::vector<TIndex>{batch_size, index_size});
|
||||
if (one_hots->size() == 0) {
|
||||
return true;
|
||||
}
|
||||
auto* one_hots_ptr = one_hots->mutable_data<float>();
|
||||
memset(one_hots_ptr, 0, one_hots->nbytes());
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
auto label_idx = indices_ptr[i];
|
||||
DCHECK((0 <= label_idx) && (label_idx < index_size));
|
||||
one_hots_ptr[label_idx] = 1.0;
|
||||
one_hots_ptr += index_size;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class SegmentOneHotOp : public Operator<CPUContext> {
|
||||
public:
|
||||
SegmentOneHotOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& lengths = Input(0);
|
||||
auto& indices = Input(1);
|
||||
auto& index_size_tensor = Input(2);
|
||||
CAFFE_ENFORCE(lengths.ndim() == 1);
|
||||
CAFFE_ENFORCE(indices.ndim() == 1);
|
||||
CAFFE_ENFORCE(index_size_tensor.size() == 1);
|
||||
auto batch_size = lengths.size();
|
||||
auto index_size = *index_size_tensor.data<int64_t>();
|
||||
CAFFE_ENFORCE(index_size > 0);
|
||||
|
||||
auto* lengths_ptr = lengths.data<int32_t>();
|
||||
auto* indices_ptr = indices.data<int64_t>();
|
||||
auto* one_hots = Output(0);
|
||||
one_hots->Resize(std::vector<TIndex>{batch_size, index_size});
|
||||
auto* one_hots_ptr = one_hots->mutable_data<float>();
|
||||
if (one_hots->size() == 0) {
|
||||
return true;
|
||||
}
|
||||
memset(one_hots_ptr, 0, one_hots->nbytes());
|
||||
int el_idx = 0;
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
for (int j = 0; j < lengths_ptr[i]; ++j) {
|
||||
DCHECK(el_idx < indices.size());
|
||||
auto label_idx = indices_ptr[el_idx++];
|
||||
DCHECK((0 <= label_idx) && (label_idx < index_size));
|
||||
one_hots_ptr[label_idx] = 1.0;
|
||||
}
|
||||
one_hots_ptr += index_size;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_CPU_OPERATOR(OneHot, OneHotOp);
|
||||
REGISTER_CPU_OPERATOR(SegmentOneHot, SegmentOneHotOp);
|
||||
|
||||
OPERATOR_SCHEMA(OneHot)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Given a sequence of indices, one for each example in a batch, returns a matrix
|
||||
where each inner dimension has the size of the index and has 1.0 in the index
|
||||
active in the given example, and 0.0 everywhere else.
|
||||
)DOC")
|
||||
.Input(0, "indices", "The active index for each example in the batch.")
|
||||
.Input(1, "index_size_tensor", "Scalar with the size of the index.")
|
||||
.Output(0, "one_hots", "Matrix of size len(indices) x index_size");
|
||||
|
||||
OPERATOR_SCHEMA(SegmentOneHot)
|
||||
.NumInputs(3)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Given a sequence of indices, segmented by the lengths tensor, returns a matrix
|
||||
that has the elements in each sequence set to 1.0, and 0.0 everywhere else.
|
||||
)DOC")
|
||||
.Input(0, "lengths", "Size of each segment.")
|
||||
.Input(1, "indices", "Active indices, of size sum(lengths)")
|
||||
.Input(2, "index_size_tensor", "Size of the index")
|
||||
.Output(0, "one_hots", "Matrix of size len(lengths) x index_size");
|
||||
|
||||
NO_GRADIENT(OneHot);
|
||||
NO_GRADIENT(SegmentOneHot);
|
||||
}
|
||||
}
|
||||
|
|
@ -124,6 +124,67 @@ struct LogSumExpRangeReducerDef {
|
|||
"input slices. Operation doesn't change the shape of individual blocks.";
|
||||
};
|
||||
|
||||
template <typename T, class Context>
|
||||
class LogMeanExpRangeReducer;
|
||||
template <typename T, class Context>
|
||||
class LogMeanExpRangeReducerGradient;
|
||||
|
||||
template <typename T>
|
||||
class LogMeanExpRangeReducer<T, CPUContext> {
|
||||
public:
|
||||
void operator()(
|
||||
const TIndex block_size,
|
||||
const TIndex blocks,
|
||||
const T* in,
|
||||
T* out,
|
||||
CPUContext* context) {
|
||||
for (int j = 0; j < block_size; ++j) {
|
||||
T max_value = std::numeric_limits<T>::lowest();
|
||||
for (int i = 0; i < blocks; ++i) {
|
||||
max_value = std::max(max_value, in[i * block_size + j]);
|
||||
}
|
||||
T scaled_exp_sum = 0;
|
||||
for (int i = 0; i < blocks; ++i) {
|
||||
scaled_exp_sum += std::exp(in[i * block_size + j] - max_value);
|
||||
}
|
||||
scaled_exp_sum /= blocks;
|
||||
*(out++) = std::log(scaled_exp_sum) + max_value;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, class Context>
|
||||
class LogMeanExpRangeReducerGradient {
|
||||
public:
|
||||
void operator()(
|
||||
const TIndex block_size,
|
||||
const TIndex blocks,
|
||||
const T* segment_grad, // GO
|
||||
T* data_grad, // GI
|
||||
const T* data_in, // I
|
||||
const T* data_out, // O
|
||||
Context* context) {
|
||||
for (int j = 0; j < block_size; ++j) {
|
||||
const T out_grad = *(segment_grad++);
|
||||
const T offset = *(data_out++);
|
||||
for (int i = 0; i < blocks; ++i) {
|
||||
auto idx = i * block_size + j;
|
||||
data_grad[idx] = out_grad * std::exp(data_in[idx] - offset) / blocks;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct LogMeanExpRangeReducerDef {
|
||||
template <typename T, class Context>
|
||||
using Reducer = LogMeanExpRangeReducer<T, Context>;
|
||||
template <typename T, class Context>
|
||||
using ReducerGradient = LogMeanExpRangeReducerGradient<T, Context>;
|
||||
static constexpr const char* name = "LogMeanExp";
|
||||
static constexpr const char* doc =
|
||||
"LogMeanExp computes the element-wise log of the mean of exponentials of "
|
||||
"input slices. Operation doesn't change the shape of individual blocks.";
|
||||
};
|
||||
|
||||
template <typename T, class Context>
|
||||
class MeanRangeReducer;
|
||||
|
|
@ -180,10 +241,78 @@ struct MeanRangeReducerDef {
|
|||
static constexpr const char* doc =
|
||||
"Mean computation is done element-wise, so that each element of the "
|
||||
"output slice corresponds to the average value of the respective "
|
||||
"elements in the input slives. Operation doesn't change the shape of "
|
||||
"elements in the input slices. Operation doesn't change the shape of "
|
||||
"individual blocks.";
|
||||
};
|
||||
|
||||
template <typename T, class Context>
|
||||
class MaxRangeReducer;
|
||||
template <typename T, class Context>
|
||||
class MaxRangeReducerGradient;
|
||||
|
||||
template <typename T>
|
||||
class MaxRangeReducer<T, CPUContext> {
|
||||
public:
|
||||
void operator()(
|
||||
const TIndex block_size,
|
||||
const TIndex blocks,
|
||||
const T* in,
|
||||
T* out,
|
||||
CPUContext* context) {
|
||||
for (int j = 0; j < block_size; ++j) {
|
||||
T max_value = std::numeric_limits<T>::lowest();
|
||||
for (int i = 0; i < blocks; ++i) {
|
||||
max_value = std::max(max_value, in[i * block_size + j]);
|
||||
}
|
||||
*(out++) = max_value;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, class Context>
|
||||
class MaxRangeReducerGradient {
|
||||
public:
|
||||
void operator()(
|
||||
const TIndex block_size,
|
||||
const TIndex blocks,
|
||||
const T* segment_grad, // GO
|
||||
T* data_grad, // GI
|
||||
const T* data_in, // I
|
||||
const T* data_out, // O
|
||||
Context* context) {
|
||||
std::memset(
|
||||
static_cast<void*>(data_grad), 0, blocks * block_size * sizeof(T));
|
||||
for (int j = 0; j < block_size; ++j) {
|
||||
const T out_grad = *(segment_grad++);
|
||||
const T out = data_out[j];
|
||||
for (int i = 0; i < blocks; ++i) {
|
||||
auto idx = i * block_size + j;
|
||||
if (out == data_in[idx]) {
|
||||
data_grad[idx] = out_grad;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct MaxRangeReducerDef {
|
||||
template <typename T, class Context>
|
||||
using Reducer = MaxRangeReducer<T, Context>;
|
||||
template <typename T, class Context>
|
||||
using ReducerGradient = MaxRangeReducerGradient<T, Context>;
|
||||
static constexpr const char* name = "Max";
|
||||
static constexpr const char* doc =
|
||||
"Max computation is done element-wise, so that each element of the "
|
||||
"output slice corresponds to the max value of the respective "
|
||||
"elements in the input slices. Operation doesn't change the shape of "
|
||||
"individual blocks. This implementation imitates torch nn.Max operator. "
|
||||
"If the maximum value occurs more than once, the operator will return "
|
||||
"the first occurence of value. When computing the gradient using the "
|
||||
"backward propagation, the gradient input corresponding to the first "
|
||||
"occurence of the maximum value will be used.";
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Incremental reducers: consume elements one by one
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
|||
|
|
@ -32,12 +32,16 @@ class CuDNNReluOp final : public Operator<CUDAContext> {
|
|||
if (X.dims() != cudnn_input_dims_) {
|
||||
VLOG(1) << "Setting descriptors.";
|
||||
cudnn_input_dims_ = X.dims();
|
||||
int C = (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(3));
|
||||
int H = 1;
|
||||
int W = 1;
|
||||
int C = 1, H = 1, W = 1;
|
||||
if (X.ndim() == 4) {
|
||||
// Normal 4-dimensional tensors for images.
|
||||
C = (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(3));
|
||||
H = (order_ == StorageOrder::NCHW ? X.dim32(2) : X.dim32(1));
|
||||
W = (order_ == StorageOrder::NCHW ? X.dim32(3) : X.dim32(2));
|
||||
} else {
|
||||
// If X is not 4-dimensional, we will simply use H = 1 and W = 1
|
||||
// and wrap everything into C.
|
||||
C = X.size() / X.dim32(0);
|
||||
}
|
||||
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||
data_desc_, GetCudnnTensorFormat(order_),
|
||||
|
|
@ -93,12 +97,16 @@ class CuDNNReluGradientOp final : public Operator<CUDAContext> {
|
|||
if (Y.dims() != cudnn_input_dims_) {
|
||||
VLOG(1) << "Setting descriptors.";
|
||||
cudnn_input_dims_ = Y.dims();
|
||||
int C = (order_ == StorageOrder::NCHW ? Y.dim32(1) : Y.dim32(3));
|
||||
int H = 1;
|
||||
int W = 1;
|
||||
int C = 1, H = 1, W = 1;
|
||||
if (Y.ndim() == 4) {
|
||||
// Normal 4-dimensional tensors for images.
|
||||
C = (order_ == StorageOrder::NCHW ? Y.dim32(1) : Y.dim32(3));
|
||||
H = (order_ == StorageOrder::NCHW ? Y.dim32(2) : Y.dim32(1));
|
||||
W = (order_ == StorageOrder::NCHW ? Y.dim32(3) : Y.dim32(2));
|
||||
} else {
|
||||
// If Y is not 4-dimensional, we will simply use H = 1 and W = 1
|
||||
// and wrap everything into C.
|
||||
C = Y.size() / Y.dim32(0);
|
||||
}
|
||||
CUDNN_CHECK(cudnnSetTensor4dDescriptor(
|
||||
data_desc_, GetCudnnTensorFormat(order_),
|
||||
|
|
|
|||
35
caffe2/operators/reverse_packed_segs_op.cc
Normal file
35
caffe2/operators/reverse_packed_segs_op.cc
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
#include "caffe2/operators/reverse_packed_segs_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
REGISTER_CPU_OPERATOR(ReversePackedSegs, ReversePackedSegsOp<CPUContext>);
|
||||
|
||||
OPERATOR_SCHEMA(ReversePackedSegs)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Reverse segments in a 3-D tensor (lengths, segments, embeddings,), leaving
|
||||
paddings unchanged. This operator is used to reverse input of a recurrent neural
|
||||
network to make it a BRNN.
|
||||
)DOC")
|
||||
.Input(0, "data", "a 3-D (lengths, segments, embeddings,) tensor.")
|
||||
.Input(1, "lengths", "length of each segment.")
|
||||
.Output(
|
||||
0,
|
||||
"reversed data",
|
||||
"a (lengths, segments, embeddings,) tensor with each segment reversed"
|
||||
"and paddings unchanged.");
|
||||
|
||||
class GetReversePackedSegsGradient : public GradientMakerBase {
|
||||
using GradientMakerBase::GradientMakerBase;
|
||||
vector<OperatorDef> GetGradientDefs() override {
|
||||
return SingleGradientDef(
|
||||
"ReversePackedSegs",
|
||||
"",
|
||||
vector<string>{GO(0), I(1)},
|
||||
vector<string>{GI(0)});
|
||||
}
|
||||
};
|
||||
REGISTER_GRADIENT(ReversePackedSegs, GetReversePackedSegsGradient);
|
||||
} // namespace
|
||||
} // namespace caffe2
|
||||
84
caffe2/operators/reverse_packed_segs_op.h
Normal file
84
caffe2/operators/reverse_packed_segs_op.h
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
#ifndef CAFFE2_OPERATORS_REVERSE_PACKED_SEGS_OP_H_
|
||||
#define CAFFE2_OPERATORS_REVERSE_PACKED_SEGS_OP_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <class Context>
|
||||
class ReversePackedSegsOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
USE_SIMPLE_CTOR_DTOR(ReversePackedSegsOp);
|
||||
USE_DISPATCH_HELPER;
|
||||
|
||||
bool RunOnDevice() override {
|
||||
return DispatchHelper<TensorTypes<float, double, int, long, bool>>::call(
|
||||
this, Input(DATA));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool DoRunWithType() {
|
||||
if (Input(LENGTHS).template IsType<int>()) {
|
||||
DoRunWithLengthType<T, int>();
|
||||
} else {
|
||||
DoRunWithLengthType<T, long>();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
INPUT_TAGS(DATA, LENGTHS);
|
||||
|
||||
template <typename T, typename LengthType>
|
||||
void DoRunWithLengthType() {
|
||||
const auto& data = Input(DATA);
|
||||
const auto& lengths = Input(LENGTHS);
|
||||
|
||||
CAFFE_ENFORCE(
|
||||
data.ndim() == 3,
|
||||
"DATA should be 3-D tensor <lengths, "
|
||||
"segments, embeddings>");
|
||||
CAFFE_ENFORCE(lengths.ndim() == 1, "LENGTH should be 1-D");
|
||||
|
||||
auto* output = Output(0);
|
||||
const auto& shape = data.dims();
|
||||
output->Resize(shape);
|
||||
|
||||
const auto& max_length = data.dims()[0];
|
||||
const auto& batch_size = data.dims()[1];
|
||||
const auto& block_size = data.dims()[2];
|
||||
CAFFE_ENFORCE(
|
||||
lengths.dims()[0] == batch_size,
|
||||
"lenths size should be"
|
||||
" equal to batch size");
|
||||
|
||||
const T* data_ptr = data.template data<T>();
|
||||
const LengthType* lengths_ptr = lengths.template data<LengthType>();
|
||||
T* rev_data_ptr = output->template mutable_data<T>();
|
||||
for (TIndex i = 0; i < batch_size; i++) {
|
||||
const auto& seg_length = lengths_ptr[i];
|
||||
CHECK_LE(seg_length, max_length);
|
||||
TIndex j = 0;
|
||||
for (; j < seg_length; j++) {
|
||||
const T* data_block_ptr = data_ptr + (j * batch_size + i) * block_size;
|
||||
T* rev_data_block_ptr =
|
||||
rev_data_ptr + ((seg_length - 1 - j) * batch_size + i) * block_size;
|
||||
context_.template Copy<T, Context, Context>(
|
||||
block_size, data_block_ptr, rev_data_block_ptr);
|
||||
}
|
||||
for (; j < max_length; j++) {
|
||||
const T* data_block_ptr = data_ptr + (j * batch_size + i) * block_size;
|
||||
T* rev_data_block_ptr =
|
||||
rev_data_ptr + (j * batch_size + i) * block_size;
|
||||
context_.template Copy<T, Context, Context>(
|
||||
block_size, data_block_ptr, rev_data_block_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_REVERSE_PACKED_SEGS_OP_H_
|
||||
|
|
@ -1066,9 +1066,15 @@ REGISTER_SEGMENT_DEF(
|
|||
REGISTER_SEGMENT_DEF(
|
||||
AbstractSortedSegmentRangeDef<float, int, CPUContext,
|
||||
LogSumExpRangeReducerDef>);
|
||||
REGISTER_SEGMENT_DEF(AbstractSortedSegmentRangeDef<
|
||||
float,
|
||||
int,
|
||||
CPUContext,
|
||||
LogMeanExpRangeReducerDef>);
|
||||
REGISTER_SEGMENT_DEF(
|
||||
AbstractSortedSegmentRangeDef<float, int, CPUContext,
|
||||
MeanRangeReducerDef>);
|
||||
AbstractSortedSegmentRangeDef<float, int, CPUContext, MeanRangeReducerDef>);
|
||||
REGISTER_SEGMENT_DEF(
|
||||
AbstractSortedSegmentRangeDef<float, int, CPUContext, MaxRangeReducerDef>);
|
||||
|
||||
#define REGISTER_REDUCER_WITH_ALL_OPS(reducer_def) \
|
||||
REGISTER_SEGMENT_DEF( \
|
||||
|
|
|
|||
152
caffe2/operators/sparse_to_dense_mask_op.cc
Normal file
152
caffe2/operators/sparse_to_dense_mask_op.cc
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/tensor.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
using ValueTypes = TensorTypes<int32_t, int64_t, float, double, string, bool>;
|
||||
|
||||
class SparseToDenseMaskOp : public Operator<CPUContext> {
|
||||
public:
|
||||
SparseToDenseMaskOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<CPUContext>(operator_def, ws) {
|
||||
std::vector<int> mask = GetRepeatedArgument<int>("mask");
|
||||
featuresCount_ = mask.size();
|
||||
auto biggest = *std::max_element(mask.begin(), mask.end());
|
||||
dense_.assign(std::min(kMaxDenseSize, biggest + 1), -1);
|
||||
for (int i = 0; i < mask.size(); i++) {
|
||||
int id = mask[i];
|
||||
CAFFE_ENFORCE(id >= 0, "Only positive IDs are allowed.");
|
||||
if (id >= kMaxDenseSize) {
|
||||
sparse_[id] = i;
|
||||
} else {
|
||||
dense_[id] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
const TypeMeta& meta = Input(INDICES).meta();
|
||||
if (meta.Match<int32_t>()) {
|
||||
return DoRunWithIndexType<int32_t>();
|
||||
} else if (meta.Match<int64_t>()) {
|
||||
return DoRunWithIndexType<int64_t>();
|
||||
} else {
|
||||
CAFFE_THROW("Unsupported type of tensor: ", meta.name());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TInd>
|
||||
bool DoRunWithIndexType() {
|
||||
if (InputSize() < 4) {
|
||||
return DoRunWithLengthType<TInd, int32_t>();
|
||||
} else {
|
||||
const TypeMeta& meta = Input(LENGTHS).meta();
|
||||
if (meta.Match<int32_t>()) {
|
||||
return DoRunWithLengthType<TInd, int32_t>();
|
||||
} else if (meta.Match<int64_t>()) {
|
||||
return DoRunWithLengthType<TInd, int64_t>();
|
||||
} else {
|
||||
CAFFE_THROW("Unsupported type of tensor: ", meta.name());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TInd, typename TLen>
|
||||
bool DoRunWithLengthType() {
|
||||
return DispatchHelper<ValueTypes, TInd, TLen>::call(this, Input(VALUES));
|
||||
}
|
||||
|
||||
template <typename TInd, typename TLen, typename TVal>
|
||||
bool DoRunWithType() {
|
||||
auto& sparse_indices = Input(INDICES);
|
||||
CAFFE_ENFORCE(sparse_indices.ndim() == 1);
|
||||
auto& sparse_values = Input(VALUES);
|
||||
CAFFE_ENFORCE(sparse_values.ndim() == 1);
|
||||
CAFFE_ENFORCE(sparse_indices.size() == sparse_values.size());
|
||||
auto& default_value = Input(DEFAULT);
|
||||
CAFFE_ENFORCE(default_value.size() == 1);
|
||||
|
||||
const TInd* sparse_indices_vec = sparse_indices.data<TInd>();
|
||||
const TVal* sparse_values_vec = sparse_values.template data<TVal>();
|
||||
const TVal* default_val = default_value.template data<TVal>();
|
||||
|
||||
int cols = featuresCount_;
|
||||
int rows = 0;
|
||||
TLen default_length = sparse_indices.dim32(0);
|
||||
const TLen* lengths_vec = nullptr;
|
||||
auto* output = Output(0);
|
||||
if (InputSize() == 4) {
|
||||
auto& lengths = Input(LENGTHS);
|
||||
CAFFE_ENFORCE(lengths.ndim() == 1);
|
||||
lengths_vec = lengths.data<TLen>();
|
||||
rows = lengths.dim32(0);
|
||||
output->Resize(rows, cols);
|
||||
}
|
||||
if (rows == 0) {
|
||||
// if the LENGTHS is not set or it is empty, the output will be a vector
|
||||
rows = 1;
|
||||
lengths_vec = &default_length;
|
||||
output->Resize(cols);
|
||||
}
|
||||
|
||||
// init
|
||||
TVal* output_data = output->template mutable_data<TVal>();
|
||||
for (int i = 0; i < cols * rows; i++) {
|
||||
output_data[i] = default_val[0];
|
||||
}
|
||||
|
||||
TLen offset = 0;
|
||||
for (int r = 0; r < rows; r++) {
|
||||
for (int c = 0; c < lengths_vec[r]; c++) {
|
||||
int idx = getFeatureIdx(sparse_indices_vec[offset + c]);
|
||||
if (idx != -1) {
|
||||
output_data[r * cols + idx] = sparse_values_vec[offset + c];
|
||||
}
|
||||
}
|
||||
offset += lengths_vec[r];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
const int kMaxDenseSize = 1024 * 128;
|
||||
|
||||
std::unordered_map<int, int> sparse_;
|
||||
std::vector<int> dense_;
|
||||
int featuresCount_;
|
||||
|
||||
inline int getFeatureIdx(int id) const {
|
||||
if (id >= kMaxDenseSize) {
|
||||
const auto& iter = sparse_.find(id);
|
||||
if (iter == sparse_.end()) {
|
||||
return -1;
|
||||
} else {
|
||||
return iter->second;
|
||||
}
|
||||
} else {
|
||||
return (id >= dense_.size()) ? -1 : dense_[id];
|
||||
}
|
||||
}
|
||||
|
||||
INPUT_TAGS(INDICES, VALUES, DEFAULT, LENGTHS);
|
||||
};
|
||||
|
||||
namespace {
|
||||
REGISTER_CPU_OPERATOR(SparseToDenseMask, SparseToDenseMaskOp);
|
||||
|
||||
OPERATOR_SCHEMA(SparseToDenseMask)
|
||||
.NumInputs(3, 4)
|
||||
.NumOutputs(1)
|
||||
.SetDoc("Convert sparse representations to dense with given indices.")
|
||||
.Output(0, "output", "1-D or 2-D dense tensor.");
|
||||
|
||||
NO_GRADIENT(SparseToDenseMask);
|
||||
} // namespace
|
||||
} // namespace caffe2
|
||||
124
caffe2/operators/string_ops.cc
Normal file
124
caffe2/operators/string_ops.cc
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
#include "caffe2/caffe2/operators/string_ops.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
|
||||
struct StartsWith {
|
||||
explicit StartsWith(OperatorBase& op)
|
||||
: prefix_(op.GetSingleArgument<std::string>("prefix", "")) {}
|
||||
bool operator()(const std::string& str) {
|
||||
return std::mismatch(prefix_.begin(), prefix_.end(), str.begin()).first ==
|
||||
prefix_.end();
|
||||
}
|
||||
|
||||
private:
|
||||
std::string prefix_;
|
||||
};
|
||||
|
||||
struct EndsWith {
|
||||
explicit EndsWith(OperatorBase& op)
|
||||
: suffix_(op.GetSingleArgument<std::string>("suffix", "")) {}
|
||||
bool operator()(const std::string& str) {
|
||||
return std::mismatch(suffix_.rbegin(), suffix_.rend(), str.rbegin())
|
||||
.first == suffix_.rend();
|
||||
}
|
||||
|
||||
private:
|
||||
std::string suffix_;
|
||||
};
|
||||
|
||||
struct Prefix {
|
||||
explicit Prefix(OperatorBase& op)
|
||||
: length_(op.GetSingleArgument<int>("length", 3)) {}
|
||||
std::string operator()(const std::string& str) {
|
||||
return std::string(str.begin(), std::min(str.end(), str.begin() + length_));
|
||||
}
|
||||
|
||||
private:
|
||||
int length_;
|
||||
};
|
||||
|
||||
struct Suffix {
|
||||
explicit Suffix(OperatorBase& op)
|
||||
: length_(op.GetSingleArgument<int>("length", 3)) {}
|
||||
std::string operator()(const std::string& str) {
|
||||
return std::string(std::max(str.begin(), str.end() - length_), str.end());
|
||||
}
|
||||
|
||||
private:
|
||||
int length_;
|
||||
};
|
||||
|
||||
template <typename ScalarFunctor, typename OutputType = std::string>
|
||||
using StringElementwiseOp = UnaryElementwiseWithArgsOp<
|
||||
TensorTypes<std::string>,
|
||||
CPUContext,
|
||||
ForEach<ScalarFunctor>,
|
||||
OutputType>;
|
||||
|
||||
REGISTER_CPU_OPERATOR(StringPrefix, StringElementwiseOp<Prefix>);
|
||||
REGISTER_CPU_OPERATOR(StringSuffix, StringElementwiseOp<Suffix>);
|
||||
REGISTER_CPU_OPERATOR(StringStartsWith, StringElementwiseOp<StartsWith, bool>);
|
||||
REGISTER_CPU_OPERATOR(StringEndsWith, StringElementwiseOp<EndsWith, bool>);
|
||||
|
||||
OPERATOR_SCHEMA(StringPrefix)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Computes the element-wise string prefix of the string tensor.
|
||||
Input strings that are shorter than prefix length will be returned unchanged.
|
||||
NOTE: Prefix is computed on number of bytes, which may lead to wrong behavior
|
||||
and potentially invalid strings for variable-length encodings such as utf-8.
|
||||
)DOC")
|
||||
.Arg("length", "Maximum size of the prefix, in bytes.")
|
||||
.Input(0, "strings", "Tensor of std::string.")
|
||||
.Output(
|
||||
0,
|
||||
"prefixes",
|
||||
"Tensor of std::string containing prefixes for each input.");
|
||||
|
||||
OPERATOR_SCHEMA(StringSuffix)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Computes the element-wise string suffix of the string tensor.
|
||||
Input strings that are shorter than suffix length will be returned unchanged.
|
||||
NOTE: Prefix is computed on number of bytes, which may lead to wrong behavior
|
||||
and potentially invalid strings for variable-length encodings such as utf-8.
|
||||
)DOC")
|
||||
.Input(0, "strings", "Tensor of std::string.")
|
||||
.Output(
|
||||
0,
|
||||
"suffixes",
|
||||
"Tensor of std::string containing suffixes for each output.")
|
||||
.Arg("length", "Maximum size of the suffix, in bytes.");
|
||||
|
||||
OPERATOR_SCHEMA(StringStartsWith)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Performs the starts-with check on each string in the input tensor.
|
||||
Returns tensor of boolean of the same dimension of input.
|
||||
)DOC")
|
||||
.Arg("prefix", "The prefix to check input strings against.")
|
||||
.Input(0, "strings", "Tensor of std::string.")
|
||||
.Output(0, "bools", "Tensor of bools of same shape as input.");
|
||||
|
||||
OPERATOR_SCHEMA(StringEndsWith)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Performs the ends-with check on each string in the input tensor.
|
||||
Returns tensor of boolean of the same dimension of input.
|
||||
)DOC")
|
||||
.Arg("suffix", "The suffix to check input strings against.")
|
||||
.Input(0, "strings", "Tensor of std::string.")
|
||||
.Output(0, "bools", "Tensor of bools of same shape as input.");
|
||||
|
||||
SHOULD_NOT_DO_GRADIENT(StringPrefix);
|
||||
SHOULD_NOT_DO_GRADIENT(StringSuffix);
|
||||
SHOULD_NOT_DO_GRADIENT(StringStartsWith);
|
||||
SHOULD_NOT_DO_GRADIENT(StringEndsWith);
|
||||
}
|
||||
} // namespace caffe2
|
||||
13
caffe2/operators/string_ops.h
Normal file
13
caffe2/operators/string_ops.h
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
#pragma once
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/operators/elementwise_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename ScalarFunctor, typename OutputType = std::string>
|
||||
using StringElementwiseOp = UnaryElementwiseWithArgsOp<
|
||||
TensorTypes<std::string>,
|
||||
CPUContext,
|
||||
ForEach<ScalarFunctor>,
|
||||
OutputType>;
|
||||
}
|
||||
|
|
@ -10,24 +10,25 @@ namespace caffe2 {
|
|||
#define COMPILE_TIME_CUDA_MAX_TRANSPOSE_DIMS 5
|
||||
|
||||
namespace {
|
||||
// TODO(jiayq): one possible optimization is to copy the buffer into a shared memory
|
||||
// location to speed up access.
|
||||
// TODO(jiayq): one possible optimization is to copy the buffer into a shared
|
||||
// memory location to speed up access.
|
||||
template <typename Dtype>
|
||||
__global__ void transpose_gpu(const int nthreads, const Dtype* from_data,
|
||||
Dtype* to_data, const int* buffer, const int num_axes) {
|
||||
int from_inds[COMPILE_TIME_CUDA_MAX_TRANSPOSE_DIMS];
|
||||
const int* from_counts = buffer;
|
||||
const int* to_counts = buffer + num_axes;
|
||||
const int* map = buffer + num_axes * 2;
|
||||
const int* axes = buffer + num_axes * 2;
|
||||
CUDA_1D_KERNEL_LOOP(index, nthreads) {
|
||||
int from_index = index, to_index = 0;
|
||||
for (int i = 0; i < num_axes; i++) {
|
||||
from_inds[i] = from_index / from_counts[i];
|
||||
from_index = from_index % from_counts[i];
|
||||
for (int i = num_axes - 1; i >= 0; --i) {
|
||||
from_inds[i] = from_index % from_counts[i];
|
||||
from_index = from_index / from_counts[i];
|
||||
}
|
||||
for (int i = 0; i < num_axes; i++) {
|
||||
to_index += from_inds[map[i]] * to_counts[i];
|
||||
for (int i = 0; i < num_axes - 1; i++) {
|
||||
to_index = (to_index + from_inds[axes[i]]) * to_counts[i + 1];
|
||||
}
|
||||
to_index += from_inds[axes[num_axes - 1]];
|
||||
to_data[to_index] = from_data[index];
|
||||
}
|
||||
}
|
||||
|
|
@ -42,7 +43,7 @@ bool TransposeOp<CUDAContext>::DoRunWithType() {
|
|||
int ndim = input.ndim();
|
||||
CAFFE_ENFORCE(count < std::numeric_limits<int>::max(),
|
||||
"Transpose op on GPU only supports int32");
|
||||
CAFFE_ENFORCE(count < COMPILE_TIME_CUDA_MAX_TRANSPOSE_DIMS,
|
||||
CAFFE_ENFORCE(ndim < COMPILE_TIME_CUDA_MAX_TRANSPOSE_DIMS,
|
||||
"Input ndim exceeds compile time max.");
|
||||
// Buffer contains the following data:
|
||||
// (1) the dimenions of the inputs
|
||||
|
|
|
|||
|
|
@ -45,7 +45,8 @@ class TransposeOp final : public Operator<Context> {
|
|||
}
|
||||
Y->Resize(new_dims_);
|
||||
// Do the actual transpose, which is implemented in DoRunWithType().
|
||||
return DispatchHelper<TensorTypes<float>>::call(this, Input(0));
|
||||
return DispatchHelper<TensorTypes<float, double, int, long>>::call(
|
||||
this, Input(0));
|
||||
}
|
||||
|
||||
protected:
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ REGISTER_CPU_OPERATOR(ScatterAssign, ScatterAssignOp<float, CPUContext>);
|
|||
REGISTER_CPU_OPERATOR(Copy, CopyOp<CPUContext, CPUContext, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(Shape, ShapeOp<CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(HasElements, HasElementsOp<CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(IsEmpty, IsEmptyOp<CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(Gather, GatherOp<float, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(Unique, UniqueOp<CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(LengthsToSegmentIds, LengthsToSegmentIdsOp<CPUContext>);
|
||||
|
|
@ -24,6 +25,7 @@ REGISTER_CPU_OPERATOR(SegmentIdsToLengths, SegmentIdsToLengthsOp<CPUContext>);
|
|||
REGISTER_CPU_OPERATOR(Slice, SliceOp<int, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(Squeeze, SqueezeOp<CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(ExpandDims, ExpandDimsOp<CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(And, AndOp<CPUContext>);
|
||||
|
||||
OPERATOR_SCHEMA(Print)
|
||||
.NumInputs(1)
|
||||
|
|
@ -209,6 +211,13 @@ OPERATOR_SCHEMA(HasElements)
|
|||
"has_elements",
|
||||
"Scalar bool tensor. True if input is not empty.");
|
||||
|
||||
OPERATOR_SCHEMA(IsEmpty)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(1)
|
||||
.SetDoc("Returns true iff the input tensor has size == 0")
|
||||
.Input(0, "tensor", "Tensor of any type.")
|
||||
.Output(0, "is_empty", "Scalar bool tensor. True if input is empty.");
|
||||
|
||||
OPERATOR_SCHEMA(Gather)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(1)
|
||||
|
|
@ -340,9 +349,21 @@ If the same blob is provided in input and output, the operation is copy-free.
|
|||
.Input(0, "data", "Original tensor")
|
||||
.Output(0, "expanded", "Reshaped tensor with same data as input.");
|
||||
|
||||
OPERATOR_SCHEMA(And)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(1)
|
||||
.AllowInplace({{0, 0}})
|
||||
.SetDoc(R"DOC(
|
||||
Outputs true iff both input blob values are true.
|
||||
)DOC")
|
||||
.Input(0, "input_0", "first boolean input.")
|
||||
.Input(1, "input_1", "second boolean input.")
|
||||
.Output(0, "output", "input_0 && input_1.");
|
||||
|
||||
SHOULD_NOT_DO_GRADIENT(Print);
|
||||
SHOULD_NOT_DO_GRADIENT(Shape);
|
||||
SHOULD_NOT_DO_GRADIENT(HasElements);
|
||||
SHOULD_NOT_DO_GRADIENT(IsEmpty);
|
||||
|
||||
class GetSqueezeGradient : public GradientMakerBase {
|
||||
using GradientMakerBase::GradientMakerBase;
|
||||
|
|
@ -433,6 +454,7 @@ SHOULD_NOT_DO_GRADIENT(LengthsToSegmentIds);
|
|||
SHOULD_NOT_DO_GRADIENT(SegmentIdsToLengths);
|
||||
// TODO(azzolini): Add support for slice gradient
|
||||
SHOULD_NOT_DO_GRADIENT(Slice);
|
||||
SHOULD_NOT_DO_GRADIENT(And);
|
||||
|
||||
} // namespace
|
||||
|
||||
|
|
|
|||
|
|
@ -675,6 +675,21 @@ class HasElementsOp : public Operator<Context> {
|
|||
}
|
||||
};
|
||||
|
||||
template <class Context>
|
||||
class IsEmptyOp : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
USE_SIMPLE_CTOR_DTOR(IsEmptyOp);
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& input = Input(0);
|
||||
auto* output = OperatorBase::Output<TensorCPU>(0);
|
||||
output->Resize(std::vector<TIndex>{});
|
||||
*output->template mutable_data<bool>() = (input.size() == 0);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
// RecordShapeOp records the shape of the input tensor to a vector of int. You
|
||||
// mostly don't need this operator explicitly, and it is mostly used in the
|
||||
// autodiff process.
|
||||
|
|
@ -911,6 +926,23 @@ class UniqueOp : public Operator<Context> {
|
|||
public:
|
||||
OUTPUT_TAGS(UNIQUE, REMAPPING);
|
||||
};
|
||||
|
||||
template <class Context>
|
||||
class AndOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
AndOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
const auto* i1 = Input(0).template data<bool>();
|
||||
const auto* i2 = Input(1).template data<bool>();
|
||||
auto* output = Output(0);
|
||||
output->Resize(std::vector<int>{});
|
||||
*output->template mutable_data<bool>() = (*i1 && *i2);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // CAFFE2_OPERATORS_UTILITY_OPS_H_
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ message ExecutionStep {
|
|||
// Criteria network specifies a single output (TensorCPU<bool>) of
|
||||
// size (1), is run on every iteration by the executor, and
|
||||
// execution terminates when the output[0] is `false`.
|
||||
optional string criteria_network = 5;
|
||||
optional string criteria_network = 5 [deprecated=true];
|
||||
|
||||
// If specified, run report_net asynchronously every `report_interval`
|
||||
// seconds. Report_net is guaranteed to run at least once after all
|
||||
|
|
@ -210,6 +210,20 @@ message ExecutionStep {
|
|||
// If false or not set, execute sub-steps serially.
|
||||
// If true, execute all substeps concurrently, each one in a separte thread.
|
||||
optional bool concurrent_substeps = 6;
|
||||
|
||||
// Name of a scalar boolean tensor.
|
||||
// ES checks this blob AFTER every substeps/subnets.
|
||||
// If specified, and the value is true, then ES will skip the rest and return
|
||||
// immediately.
|
||||
// This means that the report_net and the first step will always be called.
|
||||
// Use cases:
|
||||
// 1) the first substep stops the rest if data condition not met
|
||||
// 2) the first substep decide which of the rest of the steps should be run.
|
||||
// 3) external control
|
||||
//
|
||||
// ** It is the user's responsibility to not to put this blob in race conditions.
|
||||
// ** For example when setting this blob in concurrent substeps
|
||||
optional string should_stop_blob = 9;
|
||||
}
|
||||
|
||||
message PlanDef {
|
||||
|
|
|
|||
|
|
@ -87,7 +87,6 @@ const TypeMeta& NumpyTypeToCaffe(int numpy_type) {
|
|||
{NPY_UINT8, TypeMeta::Make<uint8_t>()},
|
||||
{NPY_UINT16, TypeMeta::Make<uint16_t>()},
|
||||
{NPY_OBJECT, TypeMeta::Make<std::string>()},
|
||||
{NPY_STRING, TypeMeta::Make<std::string>()},
|
||||
// Note: Add more types here.
|
||||
};
|
||||
static TypeMeta unknown_type;
|
||||
|
|
@ -565,11 +564,10 @@ PyObject* FetchBlob(PyObject* self, PyObject* args) {
|
|||
|
||||
PyObject* FeedBlob(PyObject* self, PyObject* args) {
|
||||
char* name_char;
|
||||
PyArrayObject* array = nullptr;
|
||||
PyObject* arg = nullptr;
|
||||
PyObject* device_option_string = nullptr;
|
||||
// TODO(dzhulgakov): implement accepting other types (at least string)
|
||||
if (!PyArg_ParseTuple(args, "sO!|O", &name_char, &PyArray_Type, &array,
|
||||
&device_option_string)) {
|
||||
if (!PyArg_ParseTuple(
|
||||
args, "sO|O", &name_char, &arg, &device_option_string)) {
|
||||
PyErr_SetString(PyExc_ValueError, "Incorrect arguments.");
|
||||
return nullptr;
|
||||
}
|
||||
|
|
@ -584,13 +582,25 @@ PyObject* FeedBlob(PyObject* self, PyObject* args) {
|
|||
}
|
||||
Blob* blob = gWorkspace->CreateBlob(name);
|
||||
|
||||
if (PyArray_Check(arg)) { // numpy array
|
||||
PyArrayObject* array = reinterpret_cast<PyArrayObject*>(arg);
|
||||
auto feeder = CreateFeeder(option.device_type());
|
||||
if (!feeder) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"Unknown device type encountered in FeedBlob.");
|
||||
PyErr_SetString(
|
||||
PyExc_TypeError, "Unknown device type encountered in FeedBlob.");
|
||||
return nullptr;
|
||||
}
|
||||
return feeder->Feed(option, array, blob);
|
||||
} else if (PyString_Check(arg)) { // string
|
||||
*blob->GetMutable<std::string>() = PyBytesToStdString(arg);
|
||||
Py_RETURN_TRUE;
|
||||
} else {
|
||||
PyErr_SetString(
|
||||
PyExc_ValueError,
|
||||
"Unexpected type of argument - only numpy array or string are "
|
||||
"supported for feeding");
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// A simple macro to avoid writing repeated symbols.
|
||||
|
|
@ -620,7 +630,7 @@ PyMethodDef* GetCaffe2PythonMethods() {
|
|||
{"cc_RunPlan", RunPlan, METH_VARARGS, ""},
|
||||
_PYNAME(CreateBlob),
|
||||
_PYNAME(SerializeBlob),
|
||||
_PYNAME(FetchBlob),
|
||||
{"cc_FetchBlob", FetchBlob, METH_VARARGS, ""},
|
||||
{"cc_FeedBlob", FeedBlob, METH_VARARGS, ""},
|
||||
{nullptr, nullptr, 0, nullptr}, // end of python methods.
|
||||
};
|
||||
|
|
|
|||
|
|
@ -160,7 +160,10 @@ class TensorFetcher : public BlobFetcherBase {
|
|||
Py_DECREF(outObj[j]);
|
||||
}
|
||||
Py_DECREF(array);
|
||||
LOG(FATAL) << "Failed to allocate string for ndarray of strings.";
|
||||
PyErr_SetString(
|
||||
PyExc_TypeError,
|
||||
"Failed to allocate string for ndarray of strings.");
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return array;
|
||||
|
|
@ -217,21 +220,14 @@ class TensorFeeder : public BlobFeederBase {
|
|||
char* str;
|
||||
Py_ssize_t strSize;
|
||||
if (PyBytes_AsStringAndSize(input[i], &str, &strSize) == -1) {
|
||||
LOG(FATAL) << "Unsupported pyhton object type passed into ndarray.";
|
||||
PyErr_SetString(
|
||||
PyExc_TypeError,
|
||||
"Unsupported python object type passed into ndarray.");
|
||||
return nullptr;
|
||||
}
|
||||
outPtr[i] = std::string(str, strSize);
|
||||
}
|
||||
} break;
|
||||
case NPY_STRING: {
|
||||
char* inputData = PyArray_BYTES(array);
|
||||
auto* outPtr = tensor->template mutable_data<std::string>();
|
||||
auto itemSize = PyArray_ITEMSIZE(array);
|
||||
for (int i = 0; i < tensor->size(); ++i) {
|
||||
auto start = inputData + i * itemSize;
|
||||
auto end = std::find(start, start + itemSize, '\0');
|
||||
outPtr[i] = std::string(start, end - start);
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
context.template CopyBytes<CPUContext, Context>(
|
||||
tensor->size() * meta.itemsize(),
|
||||
|
|
|
|||
|
|
@ -613,6 +613,7 @@ def GetArgumentParser():
|
|||
)
|
||||
parser.add_argument("--net_type", type=str, default="dag")
|
||||
parser.add_argument("--num_workers", type=int, default=2)
|
||||
parser.add_argument("--use-nvtx", default=False, action='store_true')
|
||||
return parser
|
||||
|
||||
|
||||
|
|
@ -624,7 +625,9 @@ if __name__ == '__main__':
|
|||
):
|
||||
GetArgumentParser().print_help()
|
||||
|
||||
workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
|
||||
workspace.GlobalInit(
|
||||
['caffe2', '--caffe2_log_level=0'] +
|
||||
['--caffe2_use_nvtx'] if args.use_nvtx else [])
|
||||
model_map = {
|
||||
'AlexNet': AlexNet,
|
||||
'OverFeat': OverFeat,
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ def ScopedBlobReference(name, *args, **kwargs):
|
|||
return BlobReference(scope.NAMESCOPE + name, *args, **kwargs)
|
||||
|
||||
|
||||
def _RectifyInputOutput(blobs):
|
||||
def _RectifyInputOutput(blobs, net=None):
|
||||
"""A helper function to rectify the input or output of the CreateOperator
|
||||
interface.
|
||||
"""
|
||||
|
|
@ -154,18 +154,18 @@ def _RectifyInputOutput(blobs):
|
|||
# If blobs is a single string, prepend scope.NAMESCOPE and put it as a
|
||||
# list.
|
||||
# TODO(jiayq): enforce using BlobReference instead of raw strings.
|
||||
return [ScopedBlobReference(blobs)]
|
||||
return [ScopedBlobReference(blobs, net=net)]
|
||||
elif type(blobs) is BlobReference:
|
||||
# If blob is a BlobReference, simply put it as a list.
|
||||
return [BlobReference(str(blobs))]
|
||||
elif type(blobs) is list:
|
||||
return [blobs]
|
||||
elif type(blobs) in (list, tuple):
|
||||
# If blob is a list, we go through it and type check.
|
||||
rectified = []
|
||||
for blob in blobs:
|
||||
if isinstance(blob, basestring):
|
||||
rectified.append(ScopedBlobReference(blob))
|
||||
rectified.append(ScopedBlobReference(blob, net=net))
|
||||
elif type(blob) is BlobReference:
|
||||
rectified.append(BlobReference(str(blob)))
|
||||
rectified.append(blob)
|
||||
else:
|
||||
raise TypeError(
|
||||
"I/O blob #{} of unsupported type: {} of type {}"
|
||||
|
|
@ -670,8 +670,19 @@ def get_op_ids_in_path(ssa, blob_versions, inputs, outputs):
|
|||
|
||||
|
||||
class Net(object):
|
||||
_net_names_used = set()
|
||||
operator_registry_ = {}
|
||||
|
||||
@staticmethod
|
||||
def _get_next_net_name(basename):
|
||||
name = basename
|
||||
next_idx = 1
|
||||
while name in Net._net_names_used:
|
||||
name = basename + '_' + str(next_idx)
|
||||
next_idx += 1
|
||||
Net._net_names_used |= set([name])
|
||||
return name
|
||||
|
||||
def __init__(self, name_or_proto):
|
||||
"""
|
||||
Create a Net.
|
||||
|
|
@ -706,29 +717,29 @@ class Net(object):
|
|||
else:
|
||||
self._next_name_index = 0
|
||||
else:
|
||||
name = name_or_proto
|
||||
self._net = caffe2_pb2.NetDef()
|
||||
self._net.name = name
|
||||
self._net.name = name_or_proto
|
||||
self._next_name_index = 0
|
||||
|
||||
# make sure that this net name hasn't been used before
|
||||
self._net.name = Net._get_next_net_name(self._net.name)
|
||||
|
||||
def __str__(self):
|
||||
return self._net.name
|
||||
|
||||
def DefinesBlob(self, blob):
|
||||
def BlobIsDefined(self, blob):
|
||||
"""
|
||||
Returns true if the given BlobReference is produced as output of
|
||||
an operator in this net, or if it is provided as an external input.
|
||||
"""
|
||||
if isinstance(blob, BlobReference):
|
||||
assert blob.Net() == self, 'Reference belongs to different net'
|
||||
blob_name = str(blob)
|
||||
for input in self._net.external_input:
|
||||
if input == blob_name:
|
||||
return True
|
||||
for op in self._net.op:
|
||||
for output in op.output:
|
||||
if output == blob_name:
|
||||
return True
|
||||
for input in self._net.external_input:
|
||||
if input == blob_name:
|
||||
return True
|
||||
return False
|
||||
|
||||
def UsesBlob(self, blob):
|
||||
|
|
@ -753,7 +764,7 @@ class Net(object):
|
|||
raises KeyError.
|
||||
"""
|
||||
blob_name = str(blob_name)
|
||||
if not self.DefinesBlob(blob_name):
|
||||
if not self.BlobIsDefined(blob_name):
|
||||
raise KeyError('Net does not define blob %s' % blob_name)
|
||||
return BlobReference(blob_name, self)
|
||||
|
||||
|
|
@ -818,13 +829,16 @@ class Net(object):
|
|||
new_outputs: list of BlobReferences corresponding to the
|
||||
outputs produced by new_net.
|
||||
"""
|
||||
inputs = inputs if isinstance(inputs, dict) else {i: i for i in inputs}
|
||||
input_is_pair_list = isinstance(inputs, list) and all(
|
||||
isinstance(i, tuple) and len(i) == 2 for i in inputs)
|
||||
inputs = (
|
||||
inputs if isinstance(inputs, (dict, OrderedDict)) else
|
||||
OrderedDict(inputs) if input_is_pair_list else
|
||||
OrderedDict(zip(inputs, inputs)))
|
||||
for output in outputs:
|
||||
assert self.BlobIsDefined(output)
|
||||
input_names = {str(k): str(v) for k, v in inputs.items()}
|
||||
output_names = [str(o) for o in outputs]
|
||||
for input in inputs.keys():
|
||||
assert self.UsesBlob(input)
|
||||
for output in outputs:
|
||||
assert self.DefinesBlob(output)
|
||||
proto = self._net
|
||||
ssa, blob_versions = get_ssa(proto)
|
||||
used_op_ids = get_op_ids_in_path(ssa, blob_versions, inputs, outputs)
|
||||
|
|
@ -859,9 +873,21 @@ class Net(object):
|
|||
def Proto(self):
|
||||
return self._net
|
||||
|
||||
def NextName(self):
|
||||
def NextName(self, prefix=None, output_id=None):
|
||||
"""Returns the next name to be used, if you do not want to explicitly
|
||||
name your blob."""
|
||||
if prefix:
|
||||
output_name_base = self._net.name + '/' + prefix
|
||||
output_name = output_name_base
|
||||
if output_id is not None:
|
||||
output_name += ':' + str(output_id)
|
||||
index = 2
|
||||
while self.BlobIsDefined(output_name):
|
||||
output_name = output_name_base + '_' + str(index)
|
||||
if output_id is not None:
|
||||
output_name += ':' + str(output_id)
|
||||
index += 1
|
||||
else:
|
||||
output_name = self._net.name + '_blob_' + str(self._next_name_index)
|
||||
self._next_name_index += 1
|
||||
return str(output_name)
|
||||
|
|
@ -900,16 +926,18 @@ class Net(object):
|
|||
self._net.op.extend(grad_ops)
|
||||
return input_to_grad
|
||||
|
||||
def AddExternalInput(self, input_name):
|
||||
input_name = str(input_name)
|
||||
def AddExternalInput(self, input):
|
||||
input_name = str(input)
|
||||
assert input_name not in self._net.external_input, (
|
||||
'Net already contains an input named %s' % input_name)
|
||||
self._net.external_input.extend([input_name])
|
||||
return BlobReference(input_name, self)
|
||||
return (
|
||||
input if isinstance(input, BlobReference)
|
||||
else BlobReference(input_name))
|
||||
|
||||
def AddExternalOutput(self, output):
|
||||
assert isinstance(output, BlobReference)
|
||||
assert self.DefinesBlob(output)
|
||||
assert self.BlobIsDefined(output)
|
||||
self.Proto().external_output.extend([str(output)])
|
||||
|
||||
def DeduplicateGradientSlices(self, g):
|
||||
|
|
@ -931,14 +959,22 @@ class Net(object):
|
|||
def _CreateAndAddToSelf(self, op_type, inputs, outputs=None, **kwargs):
|
||||
"""A helper function to create an operator and add it to self.
|
||||
"""
|
||||
inputs = _RectifyInputOutput(inputs)
|
||||
for input in inputs:
|
||||
if not self.BlobIsDefined(input):
|
||||
assert input.Net() != self
|
||||
self.AddExternalInput(input)
|
||||
if outputs is None:
|
||||
# If we do not specify an output, we will assume that this op
|
||||
# produces one output in this case.
|
||||
outputs = self.NextName()
|
||||
outputs = self.NextName(prefix=op_type)
|
||||
elif type(outputs) is int:
|
||||
# In this case, we will auto-fill the given number of outputs
|
||||
# with auto-generated names.
|
||||
outputs = [self.NextName() for i in range(outputs)]
|
||||
outputs = [
|
||||
self.NextName(prefix=op_type, output_id=i)
|
||||
for i in range(outputs)]
|
||||
outputs = _RectifyInputOutput(outputs, net=self)
|
||||
op = CreateOperator(op_type, inputs, outputs, **kwargs)
|
||||
self._net.op.extend([op])
|
||||
if len(op.output) == 0:
|
||||
|
|
@ -1036,10 +1072,11 @@ class ExecutionStep(object):
|
|||
self._assert_can_mutate()
|
||||
self._step.num_iter = num_iter
|
||||
|
||||
def SetCriteriaNet(self, criteria_net):
|
||||
def SetShouldStopBlob(self, should_stop_blob):
|
||||
assert isinstance(should_stop_blob, BlobReference), (
|
||||
"expects BlobReference here, got {}".format(type(should_stop_blob)))
|
||||
self._assert_can_mutate()
|
||||
_add_net_to_dict(self._net_dict, criteria_net)
|
||||
self._step.criteria_network = get_net_name(criteria_net)
|
||||
self._step.should_stop_blob = str(should_stop_blob)
|
||||
|
||||
def SetReportNet(self, report_net, report_interval):
|
||||
self._assert_can_mutate()
|
||||
|
|
@ -1053,7 +1090,7 @@ class ExecutionStep(object):
|
|||
if isinstance(substep, ExecutionStep):
|
||||
substep._notify_is_used()
|
||||
if not substep.HasNets() and not substep.HasSubsteps():
|
||||
return
|
||||
return self
|
||||
for net in substep.Nets():
|
||||
_add_net_to_dict(self._net_dict, net)
|
||||
self._substeps.append(substep)
|
||||
|
|
@ -1061,6 +1098,7 @@ class ExecutionStep(object):
|
|||
else:
|
||||
proto = substep
|
||||
self._step.substep.add().CopyFrom(proto)
|
||||
return self
|
||||
|
||||
def SetConcurrentSubsteps(self, concurrent_substeps):
|
||||
self._assert_can_mutate()
|
||||
|
|
@ -1073,6 +1111,7 @@ class ExecutionStep(object):
|
|||
assert isinstance(net, Net)
|
||||
_add_net_to_dict(self._net_dict, net)
|
||||
self._step.network.extend([get_net_name(net)])
|
||||
return self
|
||||
|
||||
|
||||
class Plan(object):
|
||||
|
|
@ -1107,11 +1146,11 @@ class Plan(object):
|
|||
|
||||
def execution_step(default_name,
|
||||
steps_or_nets,
|
||||
criteria=None,
|
||||
num_iter=None,
|
||||
report_net=None,
|
||||
report_interval=None,
|
||||
concurrent_substeps=None):
|
||||
concurrent_substeps=None,
|
||||
should_stop_blob=None):
|
||||
"""
|
||||
Helper for creating an ExecutionStep.
|
||||
- steps_or_nets can be:
|
||||
|
|
@ -1120,18 +1159,20 @@ def execution_step(default_name,
|
|||
- ExecutionStep
|
||||
- list<Net>
|
||||
- list<ExecutionStep>
|
||||
- criteria is either None or a Net
|
||||
- if no criteria or num_iter is provided, defaults to num_iter=1
|
||||
- should_stop_blob is either None or a scalar boolean blob.
|
||||
- This blob is checked AFTER every substeps/subnets.
|
||||
- If specified and true, then this step will return immediately.
|
||||
- Be sure to handle race conditions if setting from concurrent threads.
|
||||
- if no should_stop_blob or num_iter is provided, defaults to num_iter=1
|
||||
"""
|
||||
assert criteria is None or isinstance(criteria, Net)
|
||||
assert criteria is None or num_iter is None, (
|
||||
'Cannot set both criteria and num_iter.')
|
||||
if criteria is None and num_iter is None:
|
||||
assert should_stop_blob is None or num_iter is None, (
|
||||
'Cannot set both should_stop_blob and num_iter.')
|
||||
if should_stop_blob is None and num_iter is None:
|
||||
num_iter = 1
|
||||
|
||||
def set_criteria(step):
|
||||
if criteria is not None:
|
||||
step.SetCriteriaNet(criteria)
|
||||
def set_step_attr(step):
|
||||
if should_stop_blob is not None:
|
||||
step.SetShouldStopBlob(should_stop_blob)
|
||||
else:
|
||||
step.SetIter(num_iter)
|
||||
if concurrent_substeps is not None:
|
||||
|
|
@ -1144,18 +1185,20 @@ def execution_step(default_name,
|
|||
if not steps_or_nets:
|
||||
return ExecutionStep(default_name)
|
||||
if isinstance(steps_or_nets, ExecutionStep):
|
||||
return set_criteria(steps_or_nets)
|
||||
step = set_step_attr(ExecutionStep(default_name))
|
||||
step.AddSubstep(steps_or_nets)
|
||||
return step
|
||||
elif isinstance(steps_or_nets, Net):
|
||||
step = set_criteria(ExecutionStep(default_name))
|
||||
step = set_step_attr(ExecutionStep(default_name))
|
||||
step.AddNet(steps_or_nets)
|
||||
return step
|
||||
elif isinstance(steps_or_nets, list):
|
||||
if isinstance(steps_or_nets[0], Net):
|
||||
step = set_criteria(ExecutionStep(default_name))
|
||||
step = set_step_attr(ExecutionStep(default_name))
|
||||
map(step.AddNet, steps_or_nets)
|
||||
return step
|
||||
elif isinstance(steps_or_nets[0], ExecutionStep):
|
||||
step = set_criteria(ExecutionStep(default_name))
|
||||
step = set_step_attr(ExecutionStep(default_name))
|
||||
map(step.AddSubstep, steps_or_nets)
|
||||
return step
|
||||
else:
|
||||
|
|
|
|||
276
caffe2/python/dataset.py
Normal file
276
caffe2/python/dataset.py
Normal file
|
|
@ -0,0 +1,276 @@
|
|||
"""
|
||||
Implementation of an in-memory dataset with structured schema.
|
||||
|
||||
Use this to store and iterate through datasets with complex schema that
|
||||
fit in memory.
|
||||
|
||||
Iterating through entries of this dataset is very fast since the dataset
|
||||
is stored as a set of native Caffe2 tensors, thus no type conversion or
|
||||
deserialization is necessary.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from caffe2.python import core, workspace
|
||||
from caffe2.python.io import Reader, Writer
|
||||
from caffe2.python.schema import Struct
|
||||
import numpy as np
|
||||
|
||||
|
||||
class _DatasetReader(Reader):
|
||||
def __init__(self, field_names, field_blobs, cursor, name):
|
||||
"""Don't call this directly. Instead, use dataset.reader()"""
|
||||
self.field_names = field_names
|
||||
self.field_blobs = field_blobs
|
||||
self.cursor = cursor
|
||||
self.name = name
|
||||
|
||||
def read(self, read_net, batch_size=1):
|
||||
with core.NameScope(read_net.NextName(self.name)):
|
||||
fields = read_net.ReadNextBatch(
|
||||
[self.cursor] + self.field_blobs,
|
||||
self.field_names,
|
||||
batch_size=batch_size)
|
||||
return (read_net.IsEmpty([fields[0]]), fields)
|
||||
|
||||
def reset(self, net):
|
||||
net.ResetCursor([self.cursor], [])
|
||||
|
||||
|
||||
class _DatasetRandomReader(Reader):
|
||||
def __init__(self, field_names, field_blobs, cursor, name, indices):
|
||||
"""Don't call this directly. Instead, use dataset.random_reader()"""
|
||||
self.field_names = field_names
|
||||
self.field_blobs = field_blobs
|
||||
self.cursor = cursor
|
||||
self.name = name
|
||||
self.indices = indices
|
||||
|
||||
def reset(self, net):
|
||||
net.ResetCursor([self.cursor], [])
|
||||
|
||||
def computeoffset(self, net):
|
||||
self.reset(net)
|
||||
offsets = net.ComputeOffset(
|
||||
[self.cursor] + self.field_blobs,
|
||||
'offsets')
|
||||
self.offsets = offsets
|
||||
|
||||
def read(self, read_net, batch_size=1):
|
||||
fields = read_net.ReadRandomBatch(
|
||||
[self.cursor, self.indices, self.offsets] + self.field_blobs,
|
||||
self.field_names,
|
||||
batch_size=batch_size)
|
||||
return (read_net.IsEmpty([fields[0]]), fields)
|
||||
|
||||
|
||||
class _DatasetWriter(Writer):
|
||||
def __init__(self, fields, field_blobs, init_net):
|
||||
"""Don't call this directly. Use dataset.writer() instead."""
|
||||
self.fields = fields
|
||||
self.field_blobs = field_blobs
|
||||
self.mutex = init_net.CreateMutex([])
|
||||
|
||||
def write(self, writer_net, fields):
|
||||
"""
|
||||
Add operations to `net` that append the blobs in `fields` to the end
|
||||
of the dataset. An additional operator will also be added that checks
|
||||
the consistency of the data in `fields` against the dataset schema.
|
||||
|
||||
Args:
|
||||
writer_net: The net that will contain the Append operators.
|
||||
fields: A list of BlobReference to be appeneded to this dataset.
|
||||
"""
|
||||
assert len(fields) == len(self.fields), (
|
||||
'Expected %s fields, got %s.' % (len(self.fields), len(fields)))
|
||||
writer_net.CheckDatasetConsistency(fields, [], fields=self.fields)
|
||||
writer_net.AtomicAppend(
|
||||
[self.mutex] + list(self.field_blobs) + list(fields),
|
||||
self.field_blobs)
|
||||
|
||||
def commit(self, finish_net):
|
||||
"""Commit is a no-op for an in-memory dataset."""
|
||||
pass
|
||||
|
||||
|
||||
def to_ndarray_list(values, schema):
|
||||
"""
|
||||
Given a list of values and a dataset schema, produce list of ndarray in the
|
||||
right format.
|
||||
|
||||
This function will perform some checks to make sure that the arrays
|
||||
produced have the right dtype and rank.
|
||||
"""
|
||||
assert isinstance(schema, Struct), 'schema must be a Struct.'
|
||||
names = schema.field_names()
|
||||
types = schema.field_types()
|
||||
assert len(types) == len(values), (
|
||||
'Values must have %d elements, got %d' % (len(types), len(values)))
|
||||
|
||||
arrays = []
|
||||
for value, dtype, name in zip(values, types, names):
|
||||
array = np.array(value, dtype=dtype.base)
|
||||
# if array is empty we may need to reshape a little
|
||||
if array.size == 0:
|
||||
array = array.reshape((0,) + dtype.shape)
|
||||
# check that the inner dimensions match the schema
|
||||
assert (array.shape[1:] == dtype.shape), (
|
||||
'Invalid array shape for field %s. Expected (%s), got (%s).' % (
|
||||
name,
|
||||
', '.join(['_'] + map(str, dtype.shape)),
|
||||
', '.join(map(str, array.shape))))
|
||||
arrays.append(array)
|
||||
return arrays
|
||||
|
||||
|
||||
def Const(net, value, dtype=None, name=None):
|
||||
"""
|
||||
Create a 'constant' by first creating an external input in the given
|
||||
net, and then feeding the corresponding blob with its provided value
|
||||
in the current workspace. The name is automatically generated in order
|
||||
to avoid clashes with existing blob names.
|
||||
"""
|
||||
assert isinstance(net, core.Net), 'net must be a core.Net instance.'
|
||||
value = np.array(value, dtype=dtype)
|
||||
blob = net.AddExternalInput(net.NextName(prefix=name))
|
||||
workspace.FeedBlob(str(blob), value)
|
||||
return blob
|
||||
|
||||
|
||||
class Dataset(object):
|
||||
"""Represents an in-memory dataset with fixed schema.
|
||||
|
||||
Use this to store and iterate through datasets with complex schema that
|
||||
fit in memory.
|
||||
|
||||
Iterating through entries of this dataset is very fast since the dataset
|
||||
is stored as a set of native Caffe2 tensors, thus no type conversion or
|
||||
deserialization is necessary.
|
||||
"""
|
||||
|
||||
def __init__(self, fields, name=None):
|
||||
"""Create an un-initialized dataset with schema provided by `fields`.
|
||||
|
||||
Before this dataset can be used, it must be initialized, either by
|
||||
`init_empty` or `init_from_dataframe`.
|
||||
|
||||
Args:
|
||||
fields: either a schema.Struct or a list of field names in a format
|
||||
compatible with the one described in schema.py.
|
||||
name: optional name to prepend to blobs that will store the data.
|
||||
"""
|
||||
assert isinstance(fields, list) or isinstance(fields, Struct), (
|
||||
'fields must be either a Struct or a list of raw field names.')
|
||||
self.schema = fields
|
||||
self.fields = (
|
||||
fields.field_names() if isinstance(fields, Struct) else fields)
|
||||
self.field_types = (
|
||||
fields.field_types() if isinstance(fields, Struct) else
|
||||
[np.dtype(np.void)] * len(self.fields))
|
||||
self.name = name or 'dataset'
|
||||
self.field_blobs = None
|
||||
|
||||
def init_empty(self, init_net):
|
||||
"""Initialize the blobs for this dataset with empty values.
|
||||
|
||||
Empty arrays will be immediately fed into the current workspace,
|
||||
and `init_net` will take those blobs as external inputs.
|
||||
"""
|
||||
self.field_blobs = [Const(init_net, [], name=f) for f in self.fields]
|
||||
|
||||
def init_from_dataframe(self, net, dataframe):
|
||||
"""Initialize the blobs for this dataset from a Pandas dataframe.
|
||||
|
||||
Each column of the dataframe will be immediately fed into the current
|
||||
workspace, and the `net` will take this blobs as external inputs.
|
||||
"""
|
||||
assert len(self.fields) == len(dataframe.columns)
|
||||
self.field_blobs = [
|
||||
Const(net, dataframe.as_matrix([col]).flatten(), name=field)
|
||||
for col, field in enumerate(self.fields)]
|
||||
|
||||
def get_blobs(self):
|
||||
"""
|
||||
Return the list of BlobReference pointing to the blobs that contain
|
||||
the data for this dataset.
|
||||
"""
|
||||
assert self
|
||||
return self.field_blobs
|
||||
|
||||
def field_names(self):
|
||||
"""Return the list of field names for this dataset."""
|
||||
return self.fields
|
||||
|
||||
def field_types(self):
|
||||
"""
|
||||
Return the list of field dtypes for this dataset.
|
||||
|
||||
If a list of strings, not a schema.Struct, was passed to the
|
||||
constructor, this will return a list of dtype(np.void).
|
||||
"""
|
||||
return self.field_types
|
||||
|
||||
def reader(self, init_net, cursor_name=None):
|
||||
"""Create a Reader object that is used to iterate through the dataset.
|
||||
|
||||
This will append operations to `init_net` that create a TreeCursor,
|
||||
used to iterate through the data.
|
||||
|
||||
NOTE: Currently, it is not safe to append to a dataset while reading.
|
||||
|
||||
Args:
|
||||
init_net: net that will be run once to create the cursor.
|
||||
cursor_name: optional name for the blob containing a pointer
|
||||
to the cursor.
|
||||
|
||||
Returns:
|
||||
A _DatasetReader that can be used to create operators that will
|
||||
iterate through the dataset.
|
||||
"""
|
||||
assert self.field_blobs, 'Dataset not initialized.'
|
||||
cursor_name = cursor_name or (self.name + '_cursor')
|
||||
cursor = init_net.CreateTreeCursor(
|
||||
[],
|
||||
[cursor_name],
|
||||
fields=self.fields)
|
||||
return _DatasetReader(
|
||||
self.fields, self.field_blobs, cursor, cursor_name)
|
||||
|
||||
def random_reader(self, init_net, indices, cursor_name=None):
|
||||
"""Create a Reader object that is used to iterate through the dataset.
|
||||
|
||||
NOTE: The reader order depends on the order in indices.
|
||||
|
||||
Args:
|
||||
Similar to reader
|
||||
indices: blob of reading order
|
||||
|
||||
Returns:
|
||||
A DatasetReader that can be used to create operators that will
|
||||
iterate through the dataset according to indices.
|
||||
"""
|
||||
assert self.field_blobs, 'Dataset not initialized.'
|
||||
cursor_name = cursor_name or (self.name + '_cursor')
|
||||
cursor = init_net.CreateTreeCursor(
|
||||
[],
|
||||
[cursor_name],
|
||||
fields=self.fields)
|
||||
return _DatasetRandomReader(
|
||||
self.fields, self.field_blobs, cursor, cursor_name, indices)
|
||||
|
||||
def writer(self, init_net):
|
||||
"""Create a Writer that can be used to append entries into the dataset.
|
||||
|
||||
NOTE: Currently, it is not safe to append to a dataset
|
||||
while reading from it.
|
||||
NOTE: Currently implementation of writer is not thread safe.
|
||||
TODO: fixme
|
||||
|
||||
Args:
|
||||
init_net: net that will be run once in order to create the writer.
|
||||
(currently not used)
|
||||
"""
|
||||
assert self.field_blobs, 'Dataset not initialized.'
|
||||
return _DatasetWriter(self.fields, self.field_blobs, init_net)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
import numpy as np
|
||||
import copy
|
||||
from caffe2.python import core, workspace
|
||||
from caffe2.python import workspace
|
||||
|
||||
|
||||
class DeviceChecker(object):
|
||||
|
|
@ -41,9 +41,8 @@ class DeviceChecker(object):
|
|||
op.device_option.CopyFrom(device_option)
|
||||
workspace.RunOperatorOnce(op)
|
||||
results.append(
|
||||
[workspace.FetchBlob(op.output[idx]) for idx in outputs_to_check
|
||||
]
|
||||
)
|
||||
[workspace.FetchBlob(op.output[idx])
|
||||
for idx in outputs_to_check])
|
||||
# Everything is done, reset the workspace.
|
||||
workspace.ResetWorkspace()
|
||||
# After running on all devices, check correctness
|
||||
|
|
@ -61,15 +60,15 @@ class DeviceChecker(object):
|
|||
print(y.flatten())
|
||||
print(np.max(np.abs(x - y)))
|
||||
success = False
|
||||
#else:
|
||||
# else:
|
||||
# print ('Passed device pair (0, %d), %s %s' %
|
||||
# (i, outputs_to_check[j], y.shape))
|
||||
workspace.SwitchWorkspace(old_ws_name)
|
||||
return success
|
||||
|
||||
def CheckNet(self, net, inputs={}, blobs_to_check=None, ignore=set()):
|
||||
"""Checks a network by inspecting all of its intermediate results, and see
|
||||
if things match.
|
||||
"""Checks a network by inspecting all of its intermediate results, and
|
||||
see if things match.
|
||||
"""
|
||||
old_ws_name = workspace.CurrentWorkspace()
|
||||
results = []
|
||||
|
|
@ -78,8 +77,8 @@ class DeviceChecker(object):
|
|||
blobs_to_check = [b for b in blobs_to_check if b not in ignore]
|
||||
workspace.SwitchWorkspace("_device_check_", True)
|
||||
for i, device_option in enumerate(self._device_options):
|
||||
for name, arr in inputs.iteritems():
|
||||
#print 'feeding', name
|
||||
for name, arr in inputs.items():
|
||||
# print 'feeding', name
|
||||
workspace.FeedBlob(name, arr, device_option)
|
||||
for op in net.op:
|
||||
op.device_option.CopyFrom(device_option)
|
||||
|
|
@ -93,15 +92,18 @@ class DeviceChecker(object):
|
|||
for j in range(len(blobs_to_check)):
|
||||
x = results[i][j]
|
||||
y = results[0][j]
|
||||
if np.any(np.abs(x - y) > self._threshold):
|
||||
if not np.allclose(x, y,
|
||||
atol=self._threshold, rtol=self._threshold):
|
||||
print('Failure in checking device option {}'
|
||||
' and output {}. The outputs are:'
|
||||
.format(i, blobs_to_check[j]))
|
||||
print(x.flatten())
|
||||
print(y.flatten())
|
||||
print(np.max(np.abs(x - y)))
|
||||
success = False
|
||||
#else:
|
||||
# else:
|
||||
# print ('Passed device pair (%d, %d), %s %s: %s' %
|
||||
# (i, j, blobs_to_check[j], y.shape, str(y.flatten())))
|
||||
# (i, j, blobs_to_check[j], y.shape,
|
||||
# str(y.flatten())))
|
||||
workspace.SwitchWorkspace(old_ws_name)
|
||||
return success
|
||||
|
|
|
|||
|
|
@ -178,7 +178,12 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
return st.sampled_from([np.float32, np.float64])
|
||||
|
||||
_test_binary(
|
||||
"Div", ref, filter_=non_zero, test_gradient=True, dtypes=div_dtypes
|
||||
"Div", ref, filter_=non_zero, test_gradient=True,
|
||||
dtypes=div_dtypes, gcs=hu.gcs_cpu_only
|
||||
)(self)
|
||||
_test_binary(
|
||||
"Div", ref, filter_=non_zero, test_gradient=False,
|
||||
dtypes=div_dtypes
|
||||
)(self)
|
||||
_test_binary_broadcast(
|
||||
"Div", ref, filter_=non_zero, dtypes=div_dtypes)(self)
|
||||
|
|
@ -269,8 +274,8 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
for param, _ in enumerate(inputs):
|
||||
self.assertGradientChecks(gc, op, inputs, param, [0])
|
||||
|
||||
@unittest.skipIf(True,
|
||||
"Recurrent only works on CUDA 7.5 and above")
|
||||
@unittest.skipIf(not workspace.has_gpu_support,
|
||||
"Skipping test due to no gpu present.")
|
||||
@given(hidden_size=st.integers(min_value=1, max_value=3),
|
||||
num_layers=st.integers(min_value=1, max_value=3),
|
||||
bidirectional=st.booleans(),
|
||||
|
|
@ -371,10 +376,10 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
pad_l=st.integers(0, 3),
|
||||
pad_b=st.integers(0, 3),
|
||||
pad_r=st.integers(0, 3),
|
||||
kernel=st.integers(1, 5),
|
||||
size=st.integers(7, 10),
|
||||
input_channels=st.integers(1, 8),
|
||||
output_channels=st.integers(1, 8),
|
||||
kernel=st.integers(3, 5),
|
||||
size=st.integers(8, 8),
|
||||
input_channels=st.integers(1, 3),
|
||||
output_channels=st.integers(1, 3),
|
||||
batch_size=st.integers(1, 3),
|
||||
order=st.sampled_from(["NCHW", "NHWC"]),
|
||||
engine=st.sampled_from([""]),
|
||||
|
|
@ -661,7 +666,8 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
output_channels=st.integers(1, 8),
|
||||
batch_size=st.integers(1, 3),
|
||||
order=st.sampled_from(["NCHW", "NHWC"]),
|
||||
engine=st.sampled_from([""]), **hu.gcs)
|
||||
engine=st.sampled_from(["", "CUDNN"]), **hu.gcs)
|
||||
@settings(max_examples=2, timeout=100)
|
||||
def test_convolution_transpose_gradients(self, stride, pad, kernel,
|
||||
size, input_channels,
|
||||
output_channels, batch_size,
|
||||
|
|
@ -698,7 +704,7 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
input_channels=st.integers(1, 8),
|
||||
output_channels=st.integers(1, 8),
|
||||
batch_size=st.integers(1, 3),
|
||||
engine=st.sampled_from([""]), **hu.gcs)
|
||||
engine=st.sampled_from(["", "CUDNN"]), **hu.gcs)
|
||||
def test_convolution_transpose_layout(self, stride, pad, kernel,
|
||||
size, input_channels,
|
||||
output_channels, batch_size,
|
||||
|
|
@ -1049,7 +1055,7 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
@given(target_probabilities=hu.arrays(
|
||||
dims=[10], elements=st.floats(allow_nan=False,
|
||||
allow_infinity=False,
|
||||
min_value=0,
|
||||
min_value=0.01,
|
||||
max_value=1)),
|
||||
**hu.gcs)
|
||||
def test_perplexity(self, target_probabilities, gc, dc):
|
||||
|
|
@ -1477,9 +1483,13 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
op = core.CreateOperator("HasElements", ["data"], ["has_elements"])
|
||||
self.assertReferenceChecks(gc, op, [data], lambda x: (len(x) > 0, ))
|
||||
|
||||
op = core.CreateOperator("IsEmpty", ["data"], ["is_empty"])
|
||||
self.assertReferenceChecks(gc, op, [data], lambda x: (len(x) == 0, ))
|
||||
|
||||
@given(initial_iters=st.integers(0, 100),
|
||||
max_iters=st.integers(0, 100))
|
||||
def test_criteria_net_with_execution_step(self, initial_iters, max_iters):
|
||||
def test_should_stop_as_criteria_net_execution_step(
|
||||
self, initial_iters, max_iters):
|
||||
net = core.Net("net")
|
||||
net.Iter(["iter"], ["iter"])
|
||||
workspace.FeedBlob(
|
||||
|
|
@ -1487,16 +1497,87 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
workspace.FeedBlob(
|
||||
"num_iters", np.asarray([max_iters]).astype(np.int32))
|
||||
criteria_net = core.Net("criteria")
|
||||
criteria_net.LT(["iter", "num_iters"], ["continue"])
|
||||
criteria_net.Proto().external_output.extend(["continue"])
|
||||
criteria_net.GE(["iter", "num_iters"], ["stop"])
|
||||
criteria_net.Proto().external_output.extend(["stop"])
|
||||
|
||||
plan = core.Plan('plan')
|
||||
plan.AddStep(core.execution_step('step', net, criteria=criteria_net))
|
||||
plan.AddStep(core.execution_step(
|
||||
'step', [criteria_net, net],
|
||||
should_stop_blob=core.BlobReference("stop")))
|
||||
workspace.RunPlan(plan)
|
||||
iters = workspace.FetchBlob("iter")
|
||||
self.assertEqual(iters.dtype, np.int32)
|
||||
self.assertEqual(iters[0], max(initial_iters, max_iters))
|
||||
|
||||
def test_disabled_execution_step(self):
|
||||
def createNets(i, disabled):
|
||||
should_stop = 'should_stop_{}'.format(i)
|
||||
output = 'output_{}'.format(i)
|
||||
|
||||
# init content and stop signal
|
||||
init = core.Net("init_{}".format(i))
|
||||
init.ConstantFill(
|
||||
[],
|
||||
[output],
|
||||
shape=[1],
|
||||
value=0.0
|
||||
)
|
||||
init.Cast([output], [should_stop], to='bool')
|
||||
|
||||
# decide if disabled or not
|
||||
criterion = core.Net("criterion_{}".format(i))
|
||||
tmp = criterion.ConstantFill(
|
||||
[],
|
||||
shape=[1],
|
||||
value=1.0 if disabled else 0.0
|
||||
)
|
||||
criterion.Cast([tmp], [should_stop], to='bool')
|
||||
criterion.Proto().external_output.extend([should_stop])
|
||||
|
||||
# the body net is just to turn a 0 blob to 1
|
||||
net = core.Net("net_{}".format(i))
|
||||
net.ConstantFill(
|
||||
[],
|
||||
[output],
|
||||
shape=[1],
|
||||
value=1.0
|
||||
)
|
||||
|
||||
# always end the loop
|
||||
ender = core.Net("ender_{}".format(i))
|
||||
tmp = ender.ConstantFill(
|
||||
[],
|
||||
shape=[1],
|
||||
value=1.0
|
||||
)
|
||||
ender.Cast([tmp], [should_stop], to='bool')
|
||||
ender.Proto().external_output.extend([should_stop])
|
||||
|
||||
return [init, criterion, net, ender]
|
||||
|
||||
nets = [createNets(1, False),
|
||||
createNets(2, True),
|
||||
createNets(3, False)]
|
||||
steps = [
|
||||
core.execution_step(
|
||||
'step_1', nets[0],
|
||||
should_stop_blob=core.BlobReference('should_stop_1')),
|
||||
core.execution_step(
|
||||
'step_2', nets[1],
|
||||
should_stop_blob=core.BlobReference('should_stop_2')),
|
||||
core.execution_step('step_3', nets[2])
|
||||
]
|
||||
expected = [1.0, 0.0, 1.0]
|
||||
|
||||
plan = core.Plan('plan')
|
||||
plan.AddStep(core.execution_step('all_steps', steps, num_iter=3))
|
||||
workspace.RunPlan(plan)
|
||||
|
||||
for i, net in enumerate(nets):
|
||||
self.assertEqual(
|
||||
workspace.FetchBlob('output_{}'.format(i + 1))[0],
|
||||
expected[i])
|
||||
|
||||
@given(initial_iters=st.integers(0, 100),
|
||||
num_iters=st.integers(0, 100))
|
||||
def test_iter_count_with_execution_step(self, initial_iters, num_iters):
|
||||
|
|
@ -1523,6 +1604,13 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
def test_cast(self, a, src, dst, use_name, gc, dc):
|
||||
a = a.astype(src)
|
||||
|
||||
# Casting from a float type outside the range of the integral
|
||||
# type is UB.
|
||||
ftypes = [np.float32, np.float64]
|
||||
if src in ftypes and dst not in ftypes and dst is not np.bool:
|
||||
info = np.iinfo(dst)
|
||||
a = np.clip(a, info.min, info.max)
|
||||
|
||||
def ref(data):
|
||||
return [data.astype(dst)]
|
||||
|
||||
|
|
@ -1571,7 +1659,8 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
self.assertDeviceChecks(dc, op, [X], [0])
|
||||
self.assertGradientChecks(gc, op, [X], 0, [0])
|
||||
|
||||
@given(X=hu.tensor(), seed=st.integers(min_value=0, max_value=65536),
|
||||
@given(X=_dtypes().flatmap(lambda dtype: hu.tensor(dtype=dtype)),
|
||||
seed=st.integers(min_value=0, max_value=65536),
|
||||
null_axes=st.booleans(),
|
||||
**hu.gcs)
|
||||
def test_transpose(self, X, seed, null_axes, gc, dc):
|
||||
|
|
@ -1589,6 +1678,7 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
|
||||
self.assertReferenceChecks(gc, op, [X, axes],
|
||||
transpose_ref)
|
||||
if X.dtype != np.int32 and X.dtype != np.int64:
|
||||
self.assertGradientChecks(gc, op, [X], 0, [0])
|
||||
|
||||
@given(n=st.integers(1, 3),
|
||||
|
|
|
|||
|
|
@ -263,7 +263,11 @@ class HypothesisTestCase(test_util.TestCase):
|
|||
outs = []
|
||||
for (n, ref) in zip(op.output, reference_outputs):
|
||||
output = workspace.FetchBlob(n)
|
||||
np.testing.assert_allclose(output, ref, atol=1e-4, rtol=1e-4)
|
||||
if output.dtype.kind in ('S', 'O'):
|
||||
np.testing.assert_array_equal(output, ref)
|
||||
else:
|
||||
np.testing.assert_allclose(
|
||||
output, ref, atol=1e-4, rtol=1e-4)
|
||||
outs.append(output)
|
||||
if grad_reference and output_to_grad:
|
||||
self._assertGradReferenceChecks(
|
||||
|
|
|
|||
138
caffe2/python/io.py
Normal file
138
caffe2/python/io.py
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
"""
|
||||
Defines the base interface for reading and writing operations.
|
||||
|
||||
Readers/Writers are objects that produce operations that read/write sequences
|
||||
of data. Each operation reads or writes a list of BlobReferences.
|
||||
|
||||
Readers and Writers must be implemented such that read and write operations
|
||||
are atomic and thread safe.
|
||||
|
||||
Examples of possible Readers and Writers:
|
||||
HiveReader, HiveWriter,
|
||||
QueueReader, QueueWriter,
|
||||
DatasetReader, DatasetWriter,
|
||||
DBReader, DBWriter,
|
||||
|
||||
See `dataset.py` for an example of implementation.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
from caffe2.python import core
|
||||
|
||||
|
||||
class Reader(object):
|
||||
"""
|
||||
Reader is a abstract class to be implemented in order to provide
|
||||
operations capable of iterating through a dataset or stream of data.
|
||||
|
||||
A Reader must implement at least one operation, `read`, which
|
||||
adds operations to a net that read the next batch of data. Readers can
|
||||
optionally support the `reset` operation, which is useful when multiple
|
||||
passes over the data are required.
|
||||
"""
|
||||
def read(self, read_net, batch_size=1, *args):
|
||||
"""
|
||||
Add operations to read_net that will read the read batch of data
|
||||
and return a list of BlobReference representing the blobs that will
|
||||
contain the batches produced.
|
||||
|
||||
Operations added to `read_net` must be thread safe and atomic, that is,
|
||||
it should be possible to clone `read_net` and run multiple instances of
|
||||
it in parallel.
|
||||
|
||||
Args:
|
||||
read_net: the net that will be appended with read operations
|
||||
batch_size: number of entires to read
|
||||
|
||||
Returns:
|
||||
A tuple (should_stop, fields), with:
|
||||
|
||||
should_stop: BlobReference pointing to a boolean scalar
|
||||
blob that indicates whether the read operation
|
||||
was succesfull or whether the end of data has
|
||||
been reached.
|
||||
fields: A tuple of BlobReference containing the latest batch
|
||||
of data that was read.
|
||||
"""
|
||||
raise NotImplementedError('Readers must implement `read`.')
|
||||
|
||||
def reset(self, net):
|
||||
"""Append operations to `net` that will reset the reader.
|
||||
|
||||
This can be used to read the data multiple times.
|
||||
Not all readers support this operation.
|
||||
"""
|
||||
raise NotImplementedError('This reader cannot be resetted.')
|
||||
|
||||
def execution_step(self, reader_net_name=None, batch_size=1):
|
||||
"""Create an execution step with a net containing read operators.
|
||||
|
||||
The execution step will contain a `stop_blob` that knows how to stop
|
||||
the execution loop when end of data was reached.
|
||||
|
||||
E.g.:
|
||||
|
||||
read_step, fields = reader.execution_step()
|
||||
consume_net = core.Net('consume')
|
||||
consume_net.Print(fields[0], [])
|
||||
p = core.Plan('reader')
|
||||
p.AddStep(read_step.AddNet(consume_net))
|
||||
core.RunPlan(p)
|
||||
|
||||
Args:
|
||||
|
||||
reader_net_name: (optional) the name of the reader_net to be
|
||||
created. The execution step will
|
||||
be named accordingly.
|
||||
batch_size: the batch size
|
||||
|
||||
Returns:
|
||||
A tuple (read_step, fields), with:
|
||||
|
||||
read_step: A newly created execution step containing a net with
|
||||
read operations. The step will have `stop_blob` set,
|
||||
in order to stop the loop on end of data.
|
||||
fields: A tuple of BlobReference containing the latest batch
|
||||
of data that was read.
|
||||
"""
|
||||
reader_net = core.Net(reader_net_name or 'reader')
|
||||
should_stop, fields = self.read(reader_net, batch_size=batch_size)
|
||||
read_step = core.execution_step(
|
||||
'{}_step'.format(reader_net_name),
|
||||
reader_net,
|
||||
should_stop_blob=should_stop)
|
||||
return (read_step, fields)
|
||||
|
||||
|
||||
class Writer(object):
|
||||
"""
|
||||
Writer is a abstract class to be implemented in order to provide
|
||||
operations capable of feeding a data stream or a dataset.
|
||||
|
||||
A Writer must implement 2 operations:
|
||||
`write`, which adds operations to a net that write the write batch of
|
||||
data, and `commit`, which adds operations to a net in order to indicate
|
||||
that no more data will be written.
|
||||
"""
|
||||
|
||||
def write(self, writer_net, fields):
|
||||
"""Add operations to `writer_net` that write the next batch of data.
|
||||
|
||||
Operations added to the net must be thread-safe and unique, that is:
|
||||
multiple writers must be able to write to the dataset in parallel.
|
||||
|
||||
Args:
|
||||
fields: a tuple of BlobReference containing the batch of data to
|
||||
write.
|
||||
"""
|
||||
raise NotImplementedError('Writers must implement write.')
|
||||
|
||||
def commit(self, finish_net):
|
||||
"""Add operations to `finish_net` that signal end of data.
|
||||
|
||||
This must be implemented by all Writers, but may be no-op for some
|
||||
of them.
|
||||
"""
|
||||
raise NotImplementedError('Writers must implement commit.')
|
||||
|
|
@ -116,7 +116,7 @@ class TestMiniAlexNet(test_util.TestCase):
|
|||
gpu_device = caffe2_pb2.DeviceOption()
|
||||
gpu_device.device_type = caffe2_pb2.CUDA
|
||||
|
||||
checker = device_checker.DeviceChecker(1e-2, [cpu_device, gpu_device])
|
||||
checker = device_checker.DeviceChecker(0.05, [cpu_device, gpu_device])
|
||||
ret = checker.CheckNet(
|
||||
model.net.Proto(),
|
||||
inputs,
|
||||
|
|
@ -126,15 +126,16 @@ class TestMiniAlexNet(test_util.TestCase):
|
|||
)
|
||||
self.assertEqual(ret, True)
|
||||
|
||||
def testMiniAlexNet(self):
|
||||
@unittest.skipIf(not workspace.has_gpu_support,
|
||||
"No GPU support. Skipping test.")
|
||||
def testMiniAlexNetNCHW(self):
|
||||
self._testMiniAlexNet("NCHW")
|
||||
|
||||
@unittest.skipIf(not workspace.has_gpu_support,
|
||||
"No GPU support. Skipping test.")
|
||||
def testMiniAlexNetNHWC(self):
|
||||
self._testMiniAlexNet("NHWC")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not workspace.has_gpu_support:
|
||||
print('No GPU support. Skipping gpu test.')
|
||||
elif workspace.NumCudaDevices() == 0:
|
||||
print('No GPU device. Skipping gpu test.')
|
||||
else:
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ from __future__ import division
|
|||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
import argparse
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from caffe2.python import utils
|
||||
|
||||
|
|
@ -53,6 +54,11 @@ def _rectify_operator_and_name(operators_or_net, name):
|
|||
return operators, name
|
||||
|
||||
|
||||
def _escape_label(name):
|
||||
# json.dumps is poor man's escaping
|
||||
return json.dumps(name)
|
||||
|
||||
|
||||
def GetPydotGraph(operators_or_net, name=None, rankdir='LR'):
|
||||
operators, name = _rectify_operator_and_name(operators_or_net, name)
|
||||
graph = pydot.Dot(name, rankdir=rankdir)
|
||||
|
|
@ -73,7 +79,7 @@ def GetPydotGraph(operators_or_net, name=None, rankdir='LR'):
|
|||
if input_name not in pydot_nodes:
|
||||
input_node = pydot.Node(
|
||||
input_name + str(pydot_node_counts[input_name]),
|
||||
label=input_name,
|
||||
label=_escape_label(input_name),
|
||||
**BLOB_STYLE
|
||||
)
|
||||
pydot_nodes[input_name] = input_node
|
||||
|
|
@ -87,7 +93,7 @@ def GetPydotGraph(operators_or_net, name=None, rankdir='LR'):
|
|||
pydot_node_counts[output_name] += 1
|
||||
output_node = pydot.Node(
|
||||
output_name + str(pydot_node_counts[output_name]),
|
||||
label=output_name,
|
||||
label=_escape_label(output_name),
|
||||
**BLOB_STYLE
|
||||
)
|
||||
pydot_nodes[output_name] = output_node
|
||||
|
|
|
|||
43
caffe2/python/operator_test/atomic_ops_test.py
Normal file
43
caffe2/python/operator_test/atomic_ops_test.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
from caffe2.python import core, workspace
|
||||
from caffe2.python.test_util import TestCase
|
||||
|
||||
|
||||
class TestAtomicOps(TestCase):
|
||||
def test_atomic_ops(self):
|
||||
"""
|
||||
Test that both countdown and checksum are update atomically by having
|
||||
cowntdown count from 20k to 0 from parallel the workers and updating
|
||||
the checksum to the value fetched. If operations are trully atomic,
|
||||
each value from 1 to 20k should be fetched exactly once from the
|
||||
countdown, and fed exactly once to the checksum, such that at the end
|
||||
checksum must contain the exact value of sum[i=0..20000](i).
|
||||
"""
|
||||
init_net = core.Net('init')
|
||||
mutex_countdown = init_net.CreateMutex([])
|
||||
mutex_checksum = init_net.CreateMutex([])
|
||||
countdown = init_net.ConstantIntFill([], shape=[], value=20000.)
|
||||
checksum = init_net.ConstantIntFill([], shape=[], value=0.)
|
||||
minus_one = init_net.ConstantIntFill([], shape=[], value=-1.)
|
||||
steps = []
|
||||
for i in range(0, 100):
|
||||
net = core.Net('net:%d' % i)
|
||||
_, fetched_count = net.AtomicFetchAdd(
|
||||
[mutex_countdown, countdown, minus_one],
|
||||
[countdown, 'fetched_count:%d' % i])
|
||||
net.AtomicFetchAdd(
|
||||
[mutex_checksum, checksum, fetched_count],
|
||||
[checksum, 'not_used'])
|
||||
steps.append(
|
||||
core.execution_step('worker:%d' % i, net, num_iter=200))
|
||||
super_step = core.execution_step(
|
||||
'parent', steps, concurrent_substeps=True)
|
||||
plan = core.Plan('plan')
|
||||
plan.AddStep(core.execution_step('init', init_net))
|
||||
plan.AddStep(super_step)
|
||||
workspace.RunPlan(plan)
|
||||
# checksum = sum[i=1..20000](i) = 20000 * 20001 / 2 = 200010000
|
||||
self.assertEquals(workspace.FetchBlob(checksum), 200010000)
|
||||
42
caffe2/python/operator_test/counter_ops_test.py
Normal file
42
caffe2/python/operator_test/counter_ops_test.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from caffe2.python import core, workspace
|
||||
from caffe2.python.test_util import TestCase
|
||||
|
||||
|
||||
class TestCounterOps(TestCase):
|
||||
def test_counter_ops(self):
|
||||
workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'CreateCounter', [], ['c'], init_count=1))
|
||||
workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'CountDown', ['c'], ['t1'])) # 1 -> 0
|
||||
assert not workspace.FetchBlob('t1')
|
||||
|
||||
workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'CountDown', ['c'], ['t2'])) # 0 -> 0
|
||||
assert workspace.FetchBlob('t2')
|
||||
|
||||
workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'ResetCounter', ['c'], [], init_count=1)) # -> 1
|
||||
workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'CountDown', ['c'], ['t3'])) # 1 -> 0
|
||||
assert not workspace.FetchBlob('t3')
|
||||
|
||||
workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'ConstantBoolFill', [], ['t4'], value=0.0, shape=[]))
|
||||
assert workspace.FetchBlob('t4') == workspace.FetchBlob('t1')
|
||||
|
||||
workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'ConstantBoolFill', [], ['t5'], value=1.0, shape=[]))
|
||||
assert workspace.FetchBlob('t5') == workspace.FetchBlob('t2')
|
||||
|
||||
assert workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'And', ['t1', 't2'], ['t6']))
|
||||
assert not workspace.FetchBlob('t6') # True && False
|
||||
|
||||
assert workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'And', ['t2', 't5'], ['t7']))
|
||||
assert workspace.FetchBlob('t7') # True && True
|
||||
71
caffe2/python/operator_test/cross_entropy_ops_test.py
Normal file
71
caffe2/python/operator_test/cross_entropy_ops_test.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
from caffe2.python import core
|
||||
from hypothesis import given
|
||||
import caffe2.python.hypothesis_test_util as hu
|
||||
import hypothesis.strategies as st
|
||||
import numpy as np
|
||||
|
||||
|
||||
def sigmoid(x):
|
||||
return 1.0 / (1.0 + np.exp(-x))
|
||||
|
||||
|
||||
def sigmoid_cross_entropy_with_logits(x, z):
|
||||
return np.maximum(x, 0) - x * z + np.log(1 + np.exp(-np.abs(x)))
|
||||
|
||||
|
||||
def sigmoid_cross_entropy_with_logits_grad(x, z):
|
||||
return z - sigmoid(x)
|
||||
|
||||
|
||||
class TestCrossEntropyOps(hu.HypothesisTestCase):
|
||||
@given(
|
||||
inputs=st.lists(
|
||||
elements=st.integers(min_value=1, max_value=5),
|
||||
min_size=1,
|
||||
max_size=2,
|
||||
average_size=2,
|
||||
).flatmap(
|
||||
lambda shape: st.tuples(
|
||||
hu.arrays(
|
||||
dims=shape,
|
||||
elements=st.one_of(
|
||||
st.floats(min_value=-1.0, max_value=-0.1),
|
||||
st.floats(min_value=0.1, max_value=1.0),
|
||||
)),
|
||||
hu.arrays(
|
||||
dims=shape,
|
||||
elements=st.sampled_from([0.0, 1.0]),
|
||||
),
|
||||
)
|
||||
),
|
||||
)
|
||||
def test_sigmoid_cross_entropy_with_logits(self, inputs):
|
||||
logits, targets = inputs
|
||||
|
||||
def sigmoid_xentr_logit_ref(logits, targets):
|
||||
s = sigmoid_cross_entropy_with_logits(logits, targets)
|
||||
m = np.mean(s, axis=len(logits.shape) - 1)
|
||||
return (m, )
|
||||
|
||||
def sigmoid_xentr_logit_grad_ref(g_out, outputs, fwd_inputs):
|
||||
fwd_logits, fwd_targets = fwd_inputs
|
||||
inner_size = fwd_logits.shape[-1]
|
||||
m = fwd_targets - sigmoid(fwd_logits)
|
||||
g_in = -np.expand_dims(g_out, axis=-1) * m / inner_size
|
||||
return (g_in, None)
|
||||
|
||||
op = core.CreateOperator(
|
||||
'SigmoidCrossEntropyWithLogits',
|
||||
['logits', 'targets'],
|
||||
['xentropy'])
|
||||
self.assertReferenceChecks(
|
||||
hu.cpu_do,
|
||||
op,
|
||||
[logits, targets],
|
||||
sigmoid_xentr_logit_ref,
|
||||
output_to_grad='xentropy',
|
||||
grad_reference=sigmoid_xentr_logit_grad_ref)
|
||||
284
caffe2/python/operator_test/dataset_ops_test.py
Normal file
284
caffe2/python/operator_test/dataset_ops_test.py
Normal file
|
|
@ -0,0 +1,284 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
import numpy as np
|
||||
from caffe2.python import core, workspace, dataset
|
||||
from caffe2.python.dataset import Const
|
||||
from caffe2.python.schema import List, Struct, Scalar, Map
|
||||
from caffe2.python.test_util import TestCase
|
||||
|
||||
|
||||
def _assert_arrays_equal(actual, ref, err_msg):
|
||||
if ref.dtype.kind in ('S', 'O'):
|
||||
np.testing.assert_array_equal(actual, ref, err_msg=err_msg)
|
||||
else:
|
||||
np.testing.assert_allclose(
|
||||
actual, ref, atol=1e-4, rtol=1e-4, err_msg=err_msg)
|
||||
|
||||
|
||||
class TestDatasetOps(TestCase):
|
||||
def test_dataset_ops(self):
|
||||
"""
|
||||
1. Defining the schema of our dataset.
|
||||
|
||||
This example schema could represent, for example, a search query log.
|
||||
"""
|
||||
schema = Struct(
|
||||
# fixed size vector, which will be stored as a matrix when batched
|
||||
('dense', Scalar((np.float32, 3))),
|
||||
# could represent a feature map from feature ID to float value
|
||||
('floats', Map(
|
||||
Scalar(np.int32),
|
||||
Scalar(np.float32))),
|
||||
# could represent a multi-valued categorical feature map
|
||||
('int_lists', Map(
|
||||
Scalar(np.int32),
|
||||
List(Scalar(np.int64)),
|
||||
)),
|
||||
# could represent a multi-valued, weighted categorical feature map
|
||||
('id_score_pairs', Map(
|
||||
Scalar(np.int32),
|
||||
Map(
|
||||
Scalar(np.int64),
|
||||
Scalar(np.float32),
|
||||
keys_name='ids',
|
||||
values_name='scores'),
|
||||
)),
|
||||
# additional scalar information
|
||||
('metadata', Struct(
|
||||
('user_id', Scalar(np.int64)),
|
||||
('user_embed', Scalar((np.float32, 2))),
|
||||
('query', Scalar(str)),
|
||||
)),
|
||||
)
|
||||
|
||||
"""
|
||||
This is what the flattened fields for this schema look like, along
|
||||
with its type. Each one of these fields will be stored, read and
|
||||
writen as a tensor.
|
||||
"""
|
||||
expected_fields = [
|
||||
('dense', (np.float32, 3)),
|
||||
('floats:lengths', np.int32),
|
||||
('floats:values:keys', np.int32),
|
||||
('floats:values:values', np.float32),
|
||||
('int_lists:lengths', np.int32),
|
||||
('int_lists:values:keys', np.int32),
|
||||
('int_lists:values:values:lengths', np.int32),
|
||||
('int_lists:values:values:values', np.int64),
|
||||
('id_score_pairs:lengths', np.int32),
|
||||
('id_score_pairs:values:keys', np.int32),
|
||||
('id_score_pairs:values:values:lengths', np.int32),
|
||||
('id_score_pairs:values:values:values:ids', np.int64),
|
||||
('id_score_pairs:values:values:values:scores', np.float32),
|
||||
('metadata:user_id', np.int64),
|
||||
('metadata:user_embed', (np.float32, 2)),
|
||||
('metadata:query', str),
|
||||
]
|
||||
zipped = zip(
|
||||
expected_fields,
|
||||
schema.field_names(),
|
||||
schema.field_types())
|
||||
for (ref_name, ref_type), name, dtype in zipped:
|
||||
self.assertEquals(ref_name, name)
|
||||
self.assertEquals(np.dtype(ref_type), dtype)
|
||||
|
||||
"""
|
||||
2. The contents of our dataset.
|
||||
|
||||
Contents as defined below could represent, for example, a log of
|
||||
search queries along with dense, sparse features and metadata.
|
||||
The datset below has 3 top-level entries.
|
||||
"""
|
||||
contents_raw = [
|
||||
# dense
|
||||
[[1.1, 1.2, 1.3], [2.1, 2.2, 2.3], [3.1, 3.2, 3.3]],
|
||||
# floats
|
||||
[1, 2, 3], # len
|
||||
[11, 21, 22, 31, 32, 33], # key
|
||||
[1.1, 2.1, 2.2, 3.1, 3.2, 3.3], # value
|
||||
# int lists
|
||||
[2, 0, 2], # len
|
||||
[11, 12, 31, 32], # key
|
||||
[2, 4, 3, 1], # value:len
|
||||
[111, 112, 121, 122, 123, 124, 311, 312, 313, 321], # value:value
|
||||
# id score pairs
|
||||
[1, 2, 2], # len
|
||||
[11, 21, 22, 31, 32], # key
|
||||
[1, 1, 2, 2, 3], # value:len
|
||||
[111, 211, 221, 222, 311, 312, 321, 322, 323], # value:ids
|
||||
[11.1, 21.1, 22.1, 22.2, 31.1, 31.2, 32.1, 32.2, 32.3], # val:score
|
||||
# metadata
|
||||
[123, 234, 456], # user_id
|
||||
[[0.2, 0.8], [0.5, 0.5], [0.7, 0.3]], # user_embed
|
||||
['dog posts', 'friends who like to', 'posts about ca'], # query
|
||||
]
|
||||
# convert the above content to ndarrays, checking against the schema
|
||||
contents = dataset.to_ndarray_list(contents_raw, schema)
|
||||
|
||||
"""
|
||||
3. Creating and appending to the dataset.
|
||||
We first create an empty dataset with the given schema.
|
||||
Then, a Writer is used to append these entries to the dataset.
|
||||
"""
|
||||
ds = dataset.Dataset(schema)
|
||||
net = core.Net('init')
|
||||
ds.init_empty(net)
|
||||
|
||||
blobs_to_append = [Const(net, c) for c in contents]
|
||||
writer = ds.writer(init_net=net)
|
||||
writer.write(net, blobs_to_append)
|
||||
workspace.RunNetOnce(net)
|
||||
|
||||
"""
|
||||
4. Iterating through the dataset contents.
|
||||
|
||||
If we were to iterate through the top level entries of our dataset,
|
||||
this is what we should expect to see:
|
||||
"""
|
||||
entries_raw = [
|
||||
(
|
||||
[[1.1, 1.2, 1.3]], # dense
|
||||
[1], [11], [1.1], # floats
|
||||
[2], [11, 12], [2, 4], [111, 112, 121, 122, 123, 124], # intlst
|
||||
[1], [11], [1], [111], [11.1], # id score pairs
|
||||
[123], [[0.2, 0.8]], ['dog posts'], # metadata
|
||||
),
|
||||
(
|
||||
[[2.1, 2.2, 2.3]], # dense
|
||||
[2], [21, 22], [2.1, 2.2], # floats
|
||||
[0], [], [], [], # int list
|
||||
[2], [21, 22], [1, 2], [211, 221, 222], [21.1, 22.1, 22.2],
|
||||
[234], [[0.5, 0.5]], ['friends who like to'], # metadata
|
||||
),
|
||||
(
|
||||
[[3.1, 3.2, 3.3]], # dense
|
||||
[3], [31, 32, 33], [3.1, 3.2, 3.3], # floats
|
||||
[2], [31, 32], [3, 1], [311, 312, 313, 321], # int lst
|
||||
[2], [31, 32], [2, 3], [311, 312, 321, 322, 323],
|
||||
[31.1, 31.2, 32.1, 32.2, 32.3], # id score list
|
||||
[456], [[0.7, 0.3]], ['posts about ca'], # metadata
|
||||
),
|
||||
# after the end of the dataset, we will keep getting empty vectors
|
||||
([],) * 16,
|
||||
([],) * 16,
|
||||
]
|
||||
entries = [dataset.to_ndarray_list(e, schema) for e in entries_raw]
|
||||
|
||||
"""
|
||||
Let's go ahead and create the reading nets.
|
||||
We will run `read` net multiple times and assert that we are reading the
|
||||
entries the way we stated above.
|
||||
"""
|
||||
read_init_net = core.Net('read_init')
|
||||
read_next_net = core.Net('read_next')
|
||||
reader = ds.reader(read_init_net)
|
||||
should_continue, batch_blobs = reader.read(read_next_net)
|
||||
|
||||
workspace.RunNetOnce(read_init_net)
|
||||
|
||||
workspace.CreateNet(read_next_net)
|
||||
read_next_net_name = str(read_next_net)
|
||||
|
||||
for i, entry in enumerate(entries):
|
||||
workspace.RunNet(read_next_net_name)
|
||||
for name, blob, base in zip(ds.field_names(), batch_blobs, entry):
|
||||
data = workspace.FetchBlob(str(blob))
|
||||
_assert_arrays_equal(
|
||||
data, base,
|
||||
err_msg='Mismatch in entry %d, field %s' % (i, name))
|
||||
|
||||
"""
|
||||
5. Reading/writing in a single plan
|
||||
|
||||
If all of operations on the data are expressible as Caffe2 operators,
|
||||
we don't need to load the data to python, iterating through the dataset
|
||||
in a single Plan.
|
||||
|
||||
Where we will process the dataset a little and store it in a second
|
||||
dataset. We can reuse the same Reader since it supports reset.
|
||||
"""
|
||||
reset_net = core.Net('reset_net')
|
||||
reader.reset(reset_net)
|
||||
read_step, fields = reader.execution_step()
|
||||
|
||||
""" We will add the line number * 1000 to the feature ids. """
|
||||
process_net = core.Net('process')
|
||||
line_no = Const(process_net, 0, dtype=np.int32)
|
||||
const_one = Const(process_net, 1000, dtype=np.int32)
|
||||
process_net.Add([line_no, const_one], [line_no])
|
||||
fid = schema.floats.values.keys.id()
|
||||
process_net.Print(fields[fid], [])
|
||||
process_net.Add([fields[fid], line_no], fields[fid], broadcast=1)
|
||||
|
||||
""" Lets create a second dataset and append to it. """
|
||||
ds2 = dataset.Dataset(schema, name='dataset2')
|
||||
ds2.init_empty(reset_net)
|
||||
writer = ds2.writer(reset_net)
|
||||
writer.write(process_net, fields)
|
||||
# commit is not necessary for DatasetWriter but will add it for
|
||||
# generality of the example
|
||||
commit_net = core.Net('commit')
|
||||
writer.commit(commit_net)
|
||||
|
||||
""" Time to create and run a plan which will do the processing """
|
||||
plan = core.Plan('process')
|
||||
plan.AddStep(core.execution_step('reset', reset_net))
|
||||
plan.AddStep(read_step.AddNet(process_net))
|
||||
plan.AddStep(core.execution_step('commit', commit_net))
|
||||
workspace.RunPlan(plan)
|
||||
|
||||
"""
|
||||
Now we should have dataset2 populated.
|
||||
"""
|
||||
ds2blobs = ds2.get_blobs()
|
||||
for i, (name, blob) in enumerate(zip(schema.field_names(), ds2blobs)):
|
||||
data = workspace.FetchBlob(str(blob))
|
||||
content = contents[i]
|
||||
if i == fid:
|
||||
# one of our fields has been added with line numbers * 1000
|
||||
content += [1000, 2000, 2000, 3000, 3000, 3000]
|
||||
_assert_arrays_equal(
|
||||
data, contents[i], err_msg='Mismatch in field %s.' % name)
|
||||
|
||||
"""
|
||||
6. Slicing a dataset
|
||||
|
||||
You can create a new schema from pieces of another schema and reuse
|
||||
the same data.
|
||||
"""
|
||||
subschema = Struct(('top_level', schema.int_lists.values))
|
||||
int_list_contents = contents[schema.int_lists.values.slice()]
|
||||
self.assertEquals(len(subschema.field_names()), len(int_list_contents))
|
||||
|
||||
"""
|
||||
7. Random Access a dataset
|
||||
|
||||
"""
|
||||
read_init_net = core.Net('read_init')
|
||||
read_next_net = core.Net('read_next')
|
||||
|
||||
idx = np.array([2, 1, 0])
|
||||
workspace.FeedBlob('idx', idx)
|
||||
|
||||
reader = ds.random_reader(read_init_net, 'idx')
|
||||
reader.computeoffset(read_init_net)
|
||||
|
||||
should_continue, batch_blobs = reader.read(read_next_net)
|
||||
|
||||
workspace.CreateNet(read_init_net)
|
||||
workspace.RunNetOnce(read_init_net)
|
||||
|
||||
workspace.CreateNet(read_next_net)
|
||||
read_next_net_name = str(read_next_net)
|
||||
|
||||
for i in range(len(entries)):
|
||||
k = idx[i] if i in idx else i
|
||||
entry = entries[k]
|
||||
workspace.RunNet(read_next_net_name)
|
||||
for name, blob, base in zip(ds.field_names(), batch_blobs, entry):
|
||||
data = workspace.FetchBlob(str(blob))
|
||||
_assert_arrays_equal(
|
||||
data, base,
|
||||
err_msg='Mismatch in entry %d, field %s' % (i, name))
|
||||
|
|
@ -2,9 +2,9 @@ from __future__ import absolute_import
|
|||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
import numpy as np
|
||||
from caffe2.python import core, workspace
|
||||
from caffe2.python.test_util import TestCase
|
||||
import numpy as np
|
||||
|
||||
|
||||
class TestIndexOps(TestCase):
|
||||
|
|
@ -47,6 +47,13 @@ class TestIndexOps(TestCase):
|
|||
result2 = workspace.FetchBlob('result2')
|
||||
np.testing.assert_array_equal([0, 5, 1, 0, 0], result2)
|
||||
|
||||
workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'IndexSize',
|
||||
['index'],
|
||||
['index_size']))
|
||||
size = workspace.FetchBlob('index_size')
|
||||
self.assertEquals(size, 6)
|
||||
|
||||
workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'IndexStore',
|
||||
['index'],
|
||||
|
|
@ -55,3 +62,21 @@ class TestIndexOps(TestCase):
|
|||
new_entries = np.array(['new_entry1', 'new_entry2'], dtype=str)
|
||||
np.testing.assert_array_equal(
|
||||
np.concatenate((entries, new_entries)), stored_actual)
|
||||
|
||||
workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'StringIndexCreate',
|
||||
[],
|
||||
['index2']))
|
||||
|
||||
workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'IndexLoad',
|
||||
['index2', 'stored_entries'],
|
||||
[],
|
||||
skip_first_entry=1))
|
||||
|
||||
workspace.RunOperatorOnce(core.CreateOperator(
|
||||
'IndexSize',
|
||||
['index2'],
|
||||
['index2_size']))
|
||||
index2_size = workspace.FetchBlob('index2_size')
|
||||
self.assertEquals(index2_size, 5)
|
||||
|
|
|
|||
76
caffe2/python/operator_test/one_hot_ops_test.py
Normal file
76
caffe2/python/operator_test/one_hot_ops_test.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
from caffe2.python import core
|
||||
from hypothesis import given
|
||||
import caffe2.python.hypothesis_test_util as hu
|
||||
import hypothesis.strategies as st
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _one_hots():
|
||||
index_size = st.integers(min_value=1, max_value=5)
|
||||
lengths = st.lists(
|
||||
elements=st.integers(min_value=0, max_value=5))
|
||||
return st.tuples(index_size, lengths).flatmap(
|
||||
lambda x: st.tuples(
|
||||
st.just(x[0]),
|
||||
st.just(x[1]),
|
||||
st.lists(
|
||||
elements=st.integers(min_value=0, max_value=x[0] - 1),
|
||||
min_size=sum(x[1]),
|
||||
max_size=sum(x[1]))))
|
||||
|
||||
|
||||
class TestOneHotOps(hu.HypothesisTestCase):
|
||||
@given(
|
||||
hot_indices=hu.tensor(
|
||||
min_dim=1, max_dim=1, dtype=np.int64,
|
||||
elements=st.integers(min_value=0, max_value=42)),
|
||||
end_padding=st.integers(min_value=0, max_value=2))
|
||||
def test_one_hot(self, hot_indices, end_padding):
|
||||
|
||||
def one_hot_ref(hot_indices, size):
|
||||
out = np.zeros([len(hot_indices), size], dtype=float)
|
||||
x = enumerate(hot_indices)
|
||||
for i, x in enumerate(hot_indices):
|
||||
out[i, x] = 1.
|
||||
return (out, )
|
||||
|
||||
size = np.array(max(hot_indices) + end_padding + 1, dtype=np.int64)
|
||||
if size == 0:
|
||||
size = 1
|
||||
op = core.CreateOperator('OneHot', ['hot_indices', 'size'], ['output'])
|
||||
self.assertReferenceChecks(
|
||||
hu.cpu_do,
|
||||
op,
|
||||
[hot_indices, size],
|
||||
one_hot_ref)
|
||||
|
||||
@given(hot_indices=_one_hots())
|
||||
def test_segment_one_hot(self, hot_indices):
|
||||
index_size, lengths, indices = hot_indices
|
||||
|
||||
index_size = np.array(index_size, dtype=np.int64)
|
||||
lengths = np.array(lengths, dtype=np.int32)
|
||||
indices = np.array(indices, dtype=np.int64)
|
||||
|
||||
def segment_one_hot_ref(lengths, hot_indices, size):
|
||||
offset = 0
|
||||
out = np.zeros([len(lengths), size], dtype=float)
|
||||
for i, length in enumerate(lengths):
|
||||
for idx in hot_indices[offset:offset + length]:
|
||||
out[i, idx] = 1.
|
||||
offset += length
|
||||
return (out, )
|
||||
|
||||
op = core.CreateOperator(
|
||||
'SegmentOneHot',
|
||||
['lengths', 'hot_indices', 'size'],
|
||||
['output'])
|
||||
self.assertReferenceChecks(
|
||||
hu.cpu_do,
|
||||
op,
|
||||
[lengths, indices, index_size],
|
||||
segment_one_hot_ref)
|
||||
|
|
@ -83,6 +83,10 @@ def logsumexp_grad(grad_out, outputs, inputs):
|
|||
axis=0) * np.exp(inputs[0])
|
||||
|
||||
|
||||
def logmeanexp(x):
|
||||
return np.log(np.mean(np.exp(x), axis=0))
|
||||
|
||||
|
||||
def mean(x):
|
||||
return np.mean(x, axis=0)
|
||||
|
||||
|
|
@ -94,6 +98,30 @@ def mean_grad(grad_out, outputs, inputs):
|
|||
axis=0)
|
||||
|
||||
|
||||
def max(x):
|
||||
return np.amax(x, axis=0)
|
||||
|
||||
|
||||
def max_grad(grad_out, outputs, inputs):
|
||||
flat_inputs = inputs[0].flatten()
|
||||
flat_outputs = np.array(outputs[0]).flatten()
|
||||
flat_grad_in = np.zeros(flat_inputs.shape)
|
||||
flat_grad_out = np.array(grad_out).flatten()
|
||||
blocks = inputs[0].shape[0]
|
||||
block_size = flat_inputs.shape[0] // blocks
|
||||
|
||||
for i in range(block_size):
|
||||
out_grad = flat_grad_out[i]
|
||||
out = flat_outputs[i]
|
||||
for j in range(blocks):
|
||||
idx = j * block_size + i
|
||||
if out == flat_inputs[idx]:
|
||||
flat_grad_in[idx] = out_grad
|
||||
break
|
||||
|
||||
return np.resize(flat_grad_in, inputs[0].shape)
|
||||
|
||||
|
||||
REFERENCES_ALL = [
|
||||
('Sum', partial(np.sum, axis=0), sum_grad),
|
||||
]
|
||||
|
|
@ -101,7 +129,10 @@ REFERENCES_ALL = [
|
|||
REFERENCES_SORTED = [
|
||||
('RangeSum', partial(np.sum, axis=0), sum_grad),
|
||||
('RangeLogSumExp', logsumexp, logsumexp_grad),
|
||||
# gradient is the same as sum
|
||||
('RangeLogMeanExp', logmeanexp, logsumexp_grad),
|
||||
('RangeMean', mean, mean_grad),
|
||||
('RangeMax', max, max_grad),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -185,3 +185,37 @@ class TestSequenceOps(hu.HypothesisTestCase):
|
|||
op,
|
||||
[padded_data, padded_lengths],
|
||||
partial(_gather_padding_ref, start_pad_width, end_pad_width))
|
||||
|
||||
@given(data=hu.tensor(min_dim=3, max_dim=3, dtype=np.float32,
|
||||
elements=st.floats(min_value=-np.inf,
|
||||
max_value=np.inf),
|
||||
min_value=1, max_value=10),
|
||||
**hu.gcs_cpu_only)
|
||||
def test_reverse_packed_segs(self, data, gc, dc):
|
||||
max_length = data.shape[0]
|
||||
batch_size = data.shape[1]
|
||||
lengths = np.random.randint(max_length + 1, size=batch_size)
|
||||
|
||||
op = core.CreateOperator(
|
||||
"ReversePackedSegs",
|
||||
["data", "lengths"],
|
||||
["reversed_data"])
|
||||
|
||||
def op_ref(data, lengths):
|
||||
rev_data = np.array(data, copy=True)
|
||||
for i in range(batch_size):
|
||||
seg_length = lengths[i]
|
||||
for j in range(seg_length):
|
||||
rev_data[j][i] = data[seg_length - 1 - j][i]
|
||||
return (rev_data,)
|
||||
|
||||
def op_grad_ref(grad_out, outputs, inputs):
|
||||
return op_ref(grad_out, inputs[1]) + (None,)
|
||||
|
||||
self.assertReferenceChecks(
|
||||
device_option=gc,
|
||||
op=op,
|
||||
inputs=[data, lengths],
|
||||
reference=op_ref,
|
||||
output_to_grad='reversed_data',
|
||||
grad_reference=op_grad_ref)
|
||||
|
|
|
|||
106
caffe2/python/operator_test/string_ops_test.py
Normal file
106
caffe2/python/operator_test/string_ops_test.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
from caffe2.python import core
|
||||
from hypothesis import given
|
||||
import caffe2.python.hypothesis_test_util as hu
|
||||
import hypothesis.strategies as st
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _string_lists(alphabet=None):
|
||||
return st.lists(
|
||||
elements=st.text(alphabet=alphabet, average_size=3),
|
||||
min_size=0,
|
||||
max_size=3)
|
||||
|
||||
|
||||
class TestStringOps(hu.HypothesisTestCase):
|
||||
@given(strings=_string_lists())
|
||||
def test_string_prefix(self, strings):
|
||||
length = 3
|
||||
# although we are utf-8 encoding below to avoid python exceptions,
|
||||
# StringPrefix op deals with byte-length prefixes, which may produce
|
||||
# an invalid utf-8 string. The goal here is just to avoid python
|
||||
# complaining about the unicode -> str conversion.
|
||||
strings = np.array(
|
||||
map(lambda a: a.encode('utf-8'), strings), dtype=np.object)
|
||||
|
||||
def string_prefix_ref(strings):
|
||||
return (
|
||||
np.array(map(lambda a: a[:length], strings), dtype=object), )
|
||||
|
||||
op = core.CreateOperator(
|
||||
'StringPrefix',
|
||||
['strings'],
|
||||
['stripped'],
|
||||
length=length)
|
||||
self.assertReferenceChecks(
|
||||
hu.cpu_do,
|
||||
op,
|
||||
[strings],
|
||||
string_prefix_ref)
|
||||
|
||||
@given(strings=_string_lists())
|
||||
def test_string_suffix(self, strings):
|
||||
length = 3
|
||||
strings = np.array(
|
||||
map(lambda a: a.encode('utf-8'), strings), dtype=np.object)
|
||||
|
||||
def string_suffix_ref(strings):
|
||||
return (
|
||||
np.array(map(lambda a: a[-length:], strings), dtype=object), )
|
||||
|
||||
op = core.CreateOperator(
|
||||
'StringSuffix',
|
||||
['strings'],
|
||||
['stripped'],
|
||||
length=length)
|
||||
self.assertReferenceChecks(
|
||||
hu.cpu_do,
|
||||
op,
|
||||
[strings],
|
||||
string_suffix_ref)
|
||||
|
||||
@given(strings=st.text(alphabet=['a', 'b'], average_size=3))
|
||||
def test_string_starts_with(self, strings):
|
||||
prefix = 'a'
|
||||
strings = np.array(
|
||||
map(lambda a: str(strings), strings), dtype=np.object)
|
||||
|
||||
def string_starts_with_ref(strings):
|
||||
return (np.array(
|
||||
map(lambda a: a.startswith(prefix), strings), dtype=bool), )
|
||||
|
||||
op = core.CreateOperator(
|
||||
'StringStartsWith',
|
||||
['strings'],
|
||||
['bools'],
|
||||
prefix=prefix)
|
||||
self.assertReferenceChecks(
|
||||
hu.cpu_do,
|
||||
op,
|
||||
[strings],
|
||||
string_starts_with_ref)
|
||||
|
||||
@given(strings=st.text(alphabet=['a', 'b'], average_size=3))
|
||||
def test_string_ends_with(self, strings):
|
||||
suffix = 'a'
|
||||
strings = np.array(
|
||||
map(lambda a: str(strings), strings), dtype=np.object)
|
||||
|
||||
def string_ends_with_ref(strings):
|
||||
return (np.array(
|
||||
map(lambda a: a.endswith(suffix), strings), dtype=bool), )
|
||||
|
||||
op = core.CreateOperator(
|
||||
'StringEndsWith',
|
||||
['strings'],
|
||||
['bools'],
|
||||
suffix=suffix)
|
||||
self.assertReferenceChecks(
|
||||
hu.cpu_do,
|
||||
op,
|
||||
[strings],
|
||||
string_ends_with_ref)
|
||||
348
caffe2/python/schema.py
Normal file
348
caffe2/python/schema.py
Normal file
|
|
@ -0,0 +1,348 @@
|
|||
"""
|
||||
Defines a minimal set of data types that allow to represent datasets with
|
||||
arbitrary nested structure, including objects of variable length, such as
|
||||
maps and lists.
|
||||
|
||||
This defines a columnar storage format for such datasets on top of caffe2
|
||||
tensors. In terms of capacity of representation, it can represent most of
|
||||
the data types supported by Parquet, ORC, DWRF file formats.
|
||||
|
||||
See comments in operator_test/dataset_ops_test.py for a example and
|
||||
walkthrough on how to use schema to store and iterate through a structured
|
||||
in-memory dataset.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import OrderedDict
|
||||
import logging
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _join_field_name(prefix, suffix):
|
||||
if prefix and suffix:
|
||||
return '{}:{}'.format(prefix, suffix)
|
||||
elif prefix:
|
||||
return prefix
|
||||
elif suffix:
|
||||
return suffix
|
||||
else:
|
||||
return ''
|
||||
|
||||
|
||||
class Field(object):
|
||||
"""Represents an abstract field type in a dataset.
|
||||
"""
|
||||
def __init__(self, children):
|
||||
"""Derived classes must call this after their initialization."""
|
||||
self._parent = (None, 0)
|
||||
offset = 0
|
||||
self._field_offsets = []
|
||||
for child in children:
|
||||
self._field_offsets.append(offset)
|
||||
offset += len(child.field_names())
|
||||
self._field_offsets.append(offset)
|
||||
|
||||
def field_names(self):
|
||||
"""Return the children field names for this field."""
|
||||
raise NotImplementedError('Field is an abstract class.')
|
||||
|
||||
def field_types(self):
|
||||
"""Return the numpy.dtype for each of the children fields."""
|
||||
raise NotImplementedError('Field is an abstract class.')
|
||||
|
||||
def clone(self):
|
||||
"""Clone this Field along with its children."""
|
||||
raise NotImplementedError('Field is an abstract class.')
|
||||
|
||||
def _set_parent(self, parent, relative_id):
|
||||
self._parent = (parent, relative_id)
|
||||
|
||||
def slice(self):
|
||||
"""
|
||||
Returns a slice representing the range of field ids that belong to
|
||||
this field. This slice can be used to index a list of fields.
|
||||
|
||||
E.g.:
|
||||
|
||||
>>> s = Struct(
|
||||
>>> ('a', Scalar()),
|
||||
>>> ('b', Struct(
|
||||
>>> ('b1', Scalar()),
|
||||
>>> ('b2', Scalar()),
|
||||
>>> )),
|
||||
>>> ('c', Scalar()),
|
||||
>>> )
|
||||
>>> field_data = ['da', 'db1', 'db2', 'dc']
|
||||
>>> field_data[s.b.split()]
|
||||
['db1', 'db2']
|
||||
"""
|
||||
base_id = self._child_base_id()
|
||||
return slice(base_id, base_id + len(self.field_names()))
|
||||
|
||||
def _child_base_id(self, child_index=None):
|
||||
"""Get the base id of the given child"""
|
||||
p, i = self._parent
|
||||
pos = 0 if child_index is None else self._field_offsets[child_index]
|
||||
if p:
|
||||
pos += p._child_base_id(i)
|
||||
return pos
|
||||
|
||||
def __eq__(self, other):
|
||||
"""Equivalance of two schemas"""
|
||||
return ((self.field_names() == other.field_names()) and
|
||||
(self.field_types() == other.field_types()))
|
||||
|
||||
class List(Field):
|
||||
"""Represents a variable-length list.
|
||||
|
||||
Values of a list can also be complex fields such as Lists and Structs.
|
||||
In addition to the fields exposed by its `values` field, a List exposes an
|
||||
additional `lengths` field, which will contain the size of each list under
|
||||
the parent domain.
|
||||
"""
|
||||
def __init__(self, values):
|
||||
assert isinstance(values, Field)
|
||||
self.lengths = Scalar(np.int32)
|
||||
self.values = values.clone()
|
||||
self.lengths._set_parent(self, 0)
|
||||
self.values._set_parent(self, 1)
|
||||
Field.__init__(self, [self.lengths, self.values])
|
||||
|
||||
def field_names(self):
|
||||
value_fields = self.values.field_names()
|
||||
return (
|
||||
['lengths'] +
|
||||
[_join_field_name('values', v) for v in value_fields])
|
||||
|
||||
def field_types(self):
|
||||
return self.lengths.field_types() + self.values.field_types()
|
||||
|
||||
def clone(self):
|
||||
return List(self.values)
|
||||
|
||||
|
||||
class Struct(Field):
|
||||
"""Represents a named list of fields sharing the same domain.
|
||||
"""
|
||||
def __init__(self, *fields):
|
||||
for field in fields:
|
||||
assert len(field) == 2
|
||||
assert field[0], 'Field names cannot be empty'
|
||||
assert field[0] != 'lengths', (
|
||||
'Struct cannot contain a field named `lengths`.')
|
||||
assert isinstance(field[1], Field)
|
||||
fields = [(name, field.clone()) for name, field in fields]
|
||||
for id, (name, field) in enumerate(fields):
|
||||
field._set_parent(self, id)
|
||||
self.fields = OrderedDict(fields)
|
||||
Field.__init__(self, self.fields.values())
|
||||
|
||||
def field_names(self):
|
||||
names = []
|
||||
for name, field in self.fields.items():
|
||||
names += [_join_field_name(name, f) for f in field.field_names()]
|
||||
return names
|
||||
|
||||
def field_types(self):
|
||||
types = []
|
||||
for name, field in self.fields.items():
|
||||
types += field.field_types()
|
||||
return types
|
||||
|
||||
def clone(self):
|
||||
return Struct(*self.fields.items())
|
||||
|
||||
def __getattr__(self, item):
|
||||
return self.fields[item]
|
||||
|
||||
|
||||
class Scalar(Field):
|
||||
"""Represents a typed scalar or tensor of fixed shape.
|
||||
|
||||
A Scalar is a leaf in a schema tree, translating to exactly one tensor in
|
||||
the dataset's underlying storage.
|
||||
|
||||
Usually, the tensor storing the actual values of this field is a 1D tensor,
|
||||
representing a series of values in its domain. It is possible however to
|
||||
have higher rank values stored as a Scalar, as long as all entries have
|
||||
the same shape.
|
||||
|
||||
E.g.:
|
||||
|
||||
Scalar(np.float64)
|
||||
|
||||
Scalar field of type float32. Caffe2 will expect readers and
|
||||
datasets to expose it as a 1D tensor of doubles (vector), where
|
||||
the size of the vector is determined by this fields' domain.
|
||||
|
||||
Scalar((np.int32, 5))
|
||||
|
||||
Tensor field of type int32. Caffe2 will expect readers and
|
||||
datasets to implement it as a 2D tensor (matrix) of shape (L, 5),
|
||||
where L is determined by this fields' domain.
|
||||
|
||||
Scalar((str, (10, 20)))
|
||||
|
||||
Tensor field of type str. Caffe2 will expect readers and
|
||||
datasets to implement it as a 3D tensor of shape (L, 10, 20),
|
||||
where L is determined by this fields' domain.
|
||||
|
||||
If the field type is unknown at construction time, call Scalar(), that will
|
||||
default to np.void as its dtype.
|
||||
|
||||
It is an error to pass a structured dtype to Scalar, since it would contain
|
||||
more than one field. Instead, use from_dtype, which will construct
|
||||
a nested `Struct` field reflecting the given dtype's structure.
|
||||
"""
|
||||
def __init__(self, dtype=None):
|
||||
self._original_dtype = dtype
|
||||
self.dtype = np.dtype(dtype or np.void)
|
||||
assert not self.dtype.fields, (
|
||||
'Cannot create Scalar with a structured dtype. ' +
|
||||
'Use from_dtype instead.')
|
||||
Field.__init__(self, [])
|
||||
|
||||
def field_names(self):
|
||||
return ['']
|
||||
|
||||
def field_types(self):
|
||||
return [self.dtype]
|
||||
|
||||
def clone(self):
|
||||
return Scalar(self._original_dtype)
|
||||
|
||||
def id(self):
|
||||
"""
|
||||
Return the zero-indexed position of this scalar field in its schema.
|
||||
Used in order to index into the field_blob list returned by readers or
|
||||
accepted by writers.
|
||||
"""
|
||||
return self._child_base_id()
|
||||
|
||||
|
||||
def Map(keys, values, keys_name='keys', values_name='values'):
|
||||
"""A map is a List of Struct containing keys and values fields.
|
||||
Optionally, you can provide custom name for the key and value fields.
|
||||
"""
|
||||
return List(Struct((keys_name, keys), (values_name, values)))
|
||||
|
||||
|
||||
def from_dtype(dtype, _outer_shape=()):
|
||||
"""Constructs a Caffe2 schema from the given numpy's dtype.
|
||||
|
||||
Numpy supports scalar, array-like and structured datatypes, as long as
|
||||
all the shapes are fixed. This function breaks down the given dtype into
|
||||
a Caffe2 schema containing `Struct` and `Scalar` types.
|
||||
|
||||
Fields containing byte offsets are not currently supported.
|
||||
"""
|
||||
if not isinstance(dtype, np.dtype):
|
||||
# wrap into a ndtype
|
||||
shape = _outer_shape
|
||||
dtype = np.dtype((dtype, _outer_shape))
|
||||
else:
|
||||
# concatenate shapes if necessary
|
||||
shape = _outer_shape + dtype.shape
|
||||
if shape != dtype.shape:
|
||||
dtype = np.dtype((dtype.base, shape))
|
||||
|
||||
if not dtype.fields:
|
||||
return Scalar(dtype)
|
||||
|
||||
struct_fields = []
|
||||
for name, (fdtype, offset) in dtype.fields:
|
||||
assert offset == 0, ('Fields with byte offsets are not supported.')
|
||||
struct_fields += (name, from_dtype(fdtype, _outer_shape=shape))
|
||||
return Struct(*struct_fields)
|
||||
|
||||
|
||||
class _SchemaNode(object):
|
||||
"""This is a private class used to represent a Schema Node"""
|
||||
def __init__(self, name, type_str=''):
|
||||
self.name = name
|
||||
self.children = []
|
||||
self.type_str = type_str
|
||||
self.field = None
|
||||
|
||||
def add_child(self, name, type_str=''):
|
||||
for child in self.children:
|
||||
if child.name == name and child.type_str == type_str:
|
||||
return child
|
||||
child = _SchemaNode(name, type_str)
|
||||
self.children.append(child)
|
||||
return child
|
||||
|
||||
def get_field(self):
|
||||
|
||||
list_names = ['lengths', 'values']
|
||||
map_names = ['lengths', 'keys', 'values']
|
||||
|
||||
if len(self.children) == 0 or self.field is not None:
|
||||
assert self.field is not None
|
||||
return self.field
|
||||
|
||||
child_names = []
|
||||
for child in self.children:
|
||||
child_names.append(child.name)
|
||||
|
||||
if (set(child_names) == set(list_names)):
|
||||
for child in self.children:
|
||||
if child.name == 'values':
|
||||
self.field = List(child.get_field())
|
||||
self.type_str = "List"
|
||||
return self.field
|
||||
|
||||
elif (set(child_names) == set(map_names)):
|
||||
for child in self.children:
|
||||
if child.name == 'keys':
|
||||
key_field = child.get_field()
|
||||
elif child.name == 'values':
|
||||
values_field = child.get_field()
|
||||
self.field = Map(key_field, values_field)
|
||||
self.type_str = "Map"
|
||||
return self.field
|
||||
|
||||
else:
|
||||
struct_fields = []
|
||||
for child in self.children:
|
||||
if child.field is not None:
|
||||
struct_fields.append((child.name, child.field))
|
||||
else:
|
||||
struct_fields.append((child.name, child.get_field()))
|
||||
|
||||
self.field = Struct(*struct_fields)
|
||||
self.type_str = "Struct"
|
||||
return self.field
|
||||
|
||||
def print_recursively(self):
|
||||
for child in self.children:
|
||||
child.print_recursively()
|
||||
logger.info("Printing node: Name and type")
|
||||
logger.info(self.name)
|
||||
logger.info(self.type_str)
|
||||
|
||||
|
||||
def from_column_list(column_names, column_types):
|
||||
|
||||
root = _SchemaNode('root', 'Struct')
|
||||
for column_name, column_type in zip(column_names, column_types):
|
||||
columns = column_name.split(':')
|
||||
current = root
|
||||
for i in range(len(columns)):
|
||||
name = columns[i]
|
||||
type_str = ''
|
||||
field = None
|
||||
if i == len(columns) - 1:
|
||||
type_str = column_type
|
||||
field = Scalar(column_type)
|
||||
next = current.add_child(name, type_str)
|
||||
if field is not None:
|
||||
next.field = field
|
||||
current = next
|
||||
|
||||
return root.get_field()
|
||||
82
caffe2/python/sparse_to_dense_mask_test.py
Normal file
82
caffe2/python/sparse_to_dense_mask_test.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
from caffe2.python import core, workspace
|
||||
from caffe2.python.test_util import TestCase
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class TestSparseToDenseMask(TestCase):
|
||||
|
||||
def test_sparse_to_dense_mask_float(self):
|
||||
op = core.CreateOperator(
|
||||
'SparseToDenseMask',
|
||||
['indices', 'values', 'default', 'lengths'],
|
||||
['output'],
|
||||
mask=[999999999, 2, 6])
|
||||
workspace.FeedBlob(
|
||||
'indices',
|
||||
np.array([2, 4, 6, 1, 2, 999999999, 2], dtype=np.int32))
|
||||
workspace.FeedBlob(
|
||||
'values',
|
||||
np.array([1, 2, 3, 4, 5, 6, 7], dtype=np.float))
|
||||
workspace.FeedBlob('default', np.array(-1, dtype=np.float))
|
||||
workspace.FeedBlob('lengths', np.array([3, 4], dtype=np.int32))
|
||||
workspace.RunOperatorOnce(op)
|
||||
output = workspace.FetchBlob('output')
|
||||
expected = np.array([[-1, 1, 3], [6, 7, -1]], dtype=np.float)
|
||||
self.assertEqual(output.shape, expected.shape)
|
||||
self.assertFalse(np.any(output - expected))
|
||||
|
||||
def test_sparse_to_dense_mask_string(self):
|
||||
op = core.CreateOperator(
|
||||
'SparseToDenseMask',
|
||||
['indices', 'values', 'default', 'lengths'],
|
||||
['output'],
|
||||
mask=[999999999, 2, 6])
|
||||
workspace.FeedBlob(
|
||||
'indices',
|
||||
np.array([2, 4, 6, 1, 2, 999999999, 2], dtype=np.int32))
|
||||
workspace.FeedBlob(
|
||||
'values',
|
||||
np.array(['1', '2', '3', '4', '5', '6', '7'], dtype=np.str))
|
||||
workspace.FeedBlob('default', np.array('-1', dtype=np.str))
|
||||
workspace.FeedBlob('lengths', np.array([3, 4], dtype=np.int32))
|
||||
workspace.RunOperatorOnce(op)
|
||||
output = workspace.FetchBlob('output')
|
||||
expected = np.array([['-1', '1', '3'], ['6', '7', '-1']], dtype=np.str)
|
||||
self.assertEqual(output.shape, expected.shape)
|
||||
self.assertTrue(np.all(np.equal(output, expected)))
|
||||
|
||||
def test_sparse_to_dense_mask_empty_lengths(self):
|
||||
op = core.CreateOperator(
|
||||
'SparseToDenseMask',
|
||||
['indices', 'values', 'default', 'lengths'],
|
||||
['output'],
|
||||
mask=[1, 2, 6])
|
||||
workspace.FeedBlob('indices', np.array([2, 4, 6], dtype=np.int32))
|
||||
workspace.FeedBlob('values', np.array([1, 2, 3], dtype=np.float))
|
||||
workspace.FeedBlob('default', np.array(-1, dtype=np.float))
|
||||
workspace.FeedBlob('lengths', np.array([], dtype=np.int32))
|
||||
workspace.RunOperatorOnce(op)
|
||||
output = workspace.FetchBlob('output')
|
||||
expected = np.array([-1, 1, 3], dtype=np.float)
|
||||
self.assertEqual(output.shape, expected.shape)
|
||||
self.assertFalse(np.any(output - expected))
|
||||
|
||||
def test_sparse_to_dense_mask_no_lengths(self):
|
||||
op = core.CreateOperator(
|
||||
'SparseToDenseMask',
|
||||
['indices', 'values', 'default'],
|
||||
['output'],
|
||||
mask=[1, 2, 6])
|
||||
workspace.FeedBlob('indices', np.array([2, 4, 6], dtype=np.int32))
|
||||
workspace.FeedBlob('values', np.array([1, 2, 3], dtype=np.float))
|
||||
workspace.FeedBlob('default', np.array(-1, dtype=np.float))
|
||||
workspace.RunOperatorOnce(op)
|
||||
output = workspace.FetchBlob('output')
|
||||
expected = np.array([-1, 1, 3], dtype=np.float)
|
||||
self.assertEqual(output.shape, expected.shape)
|
||||
self.assertFalse(np.any(output - expected))
|
||||
|
|
@ -6,10 +6,18 @@ import shutil
|
|||
import socket
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
from caffe2.proto import caffe2_pb2
|
||||
from caffe2.python import scope, utils
|
||||
from ._import_c_extension import * # noqa
|
||||
|
||||
# Python 2 and 3 compatibility: test if basestring exists
|
||||
try:
|
||||
basestring # NOQA
|
||||
except NameError:
|
||||
# This is python3 so we define basestring.
|
||||
basestring = str
|
||||
|
||||
|
||||
def _GetFreeFlaskPort():
|
||||
"""Get a free flask port."""
|
||||
|
|
@ -86,7 +94,9 @@ def ResetWorkspace(root_folder=None):
|
|||
return cc_ResetWorkspace(root_folder)
|
||||
|
||||
|
||||
def CreateNet(net, input_blobs=[]):
|
||||
def CreateNet(net, input_blobs=None):
|
||||
if input_blobs is None:
|
||||
input_blobs = []
|
||||
for input_blob in input_blobs:
|
||||
CreateBlob(input_blob)
|
||||
return cc_CreateNet(StringfyProto(net))
|
||||
|
|
@ -112,6 +122,14 @@ def RunPlan(plan):
|
|||
return cc_RunPlan(StringfyProto(plan))
|
||||
|
||||
|
||||
def _StringifyBlobName(name):
|
||||
if isinstance(name, basestring):
|
||||
return name
|
||||
assert type(name).__name__ == 'BlobReference', \
|
||||
"Expected a string or BlobReference"
|
||||
return str(name)
|
||||
|
||||
|
||||
def FeedBlob(name, arr, device_option=None):
|
||||
"""Feeds a blob into the workspace.
|
||||
|
||||
|
|
@ -125,6 +143,10 @@ def FeedBlob(name, arr, device_option=None):
|
|||
"""
|
||||
if type(arr) is caffe2_pb2.TensorProto:
|
||||
arr = utils.Caffe2TensorToNumpyArray(arr)
|
||||
if type(arr) is np.ndarray and arr.dtype.kind == 'S':
|
||||
# Plain NumPy strings are weird, let's use objects instead
|
||||
arr = arr.astype(np.object)
|
||||
name = _StringifyBlobName(name)
|
||||
if device_option is not None:
|
||||
return cc_FeedBlob(name, arr, StringfyProto(device_option))
|
||||
elif scope.DEVICESCOPE is not None:
|
||||
|
|
@ -133,6 +155,40 @@ def FeedBlob(name, arr, device_option=None):
|
|||
return cc_FeedBlob(name, arr)
|
||||
|
||||
|
||||
def FetchBlob(name):
|
||||
"""Fetches a blob from the workspace.
|
||||
|
||||
Inputs:
|
||||
name: the name of the blob - a string or a BlobReference
|
||||
Returns:
|
||||
Fetched blob (numpy array or string) if successful
|
||||
"""
|
||||
name = _StringifyBlobName(name)
|
||||
return cc_FetchBlob(name)
|
||||
|
||||
|
||||
class _BlobDict(object):
|
||||
"""Provides python dict compatible way to do fetching and feeding"""
|
||||
|
||||
def __getitem__(self, key):
|
||||
return FetchBlob(key)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
return FeedBlob(key, value)
|
||||
|
||||
def __len__(self):
|
||||
return len(Blobs())
|
||||
|
||||
def __iter__(self):
|
||||
return Blobs().__iter__()
|
||||
|
||||
def __contains__(self, item):
|
||||
return HasBlob(item)
|
||||
|
||||
|
||||
blobs = _BlobDict()
|
||||
|
||||
|
||||
class Model(object):
|
||||
def __init__(self, net, parameters, inputs, outputs, device_option=None):
|
||||
"""Initializes a model.
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@ from caffe2.python import core, test_util, workspace
|
|||
class TestWorkspace(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.net = core.Net("test-net")
|
||||
self.net.ConstantFill([], "testblob", shape=[1, 2, 3, 4], value=1.0)
|
||||
self.testblob_ref = self.net.ConstantFill(
|
||||
[], "testblob", shape=[1, 2, 3, 4], value=1.0)
|
||||
workspace.ResetWorkspace()
|
||||
|
||||
def testRootFolder(self):
|
||||
|
|
@ -64,6 +65,20 @@ class TestWorkspace(unittest.TestCase):
|
|||
self.assertEqual(fetched_again.shape, (1, 2, 3, 4))
|
||||
np.testing.assert_array_equal(fetched_again, 2.0)
|
||||
|
||||
def testFetchFeedBlobViaBlobReference(self):
|
||||
self.assertEqual(
|
||||
workspace.RunNetOnce(self.net.Proto().SerializeToString()), True)
|
||||
fetched = workspace.FetchBlob(self.testblob_ref)
|
||||
# check if fetched is correct.
|
||||
self.assertEqual(fetched.shape, (1, 2, 3, 4))
|
||||
np.testing.assert_array_equal(fetched, 1.0)
|
||||
fetched[:] = 2.0
|
||||
self.assertEqual(workspace.FeedBlob(self.testblob_ref, fetched), True)
|
||||
fetched_again = workspace.FetchBlob("testblob") # fetch by name now
|
||||
self.assertEqual(fetched_again.shape, (1, 2, 3, 4))
|
||||
np.testing.assert_array_equal(fetched_again, 2.0)
|
||||
|
||||
|
||||
def testFetchFeedBlobTypes(self):
|
||||
for dtype in [np.float16, np.float32, np.float64, np.bool,
|
||||
np.int8, np.int16, np.int32, np.int64,
|
||||
|
|
@ -101,7 +116,8 @@ class TestWorkspace(unittest.TestCase):
|
|||
strs = np.array([
|
||||
' '.join(10 * ['long string']),
|
||||
' '.join(128 * ['very long string']),
|
||||
'small string'])
|
||||
'small \0\1\2 string',
|
||||
"Hello, world! I have special \0 symbols \1!"])
|
||||
workspace.FeedBlob('my_str_tensor', strs)
|
||||
strs2 = workspace.FetchBlob('my_str_tensor')
|
||||
self.assertEqual(strs.shape, strs2.shape)
|
||||
|
|
@ -117,6 +133,32 @@ class TestWorkspace(unittest.TestCase):
|
|||
for i in range(0, strs.shape[0]):
|
||||
self.assertEqual(strs[i], strs2[i])
|
||||
|
||||
def testFetchFeedPlainString(self):
|
||||
# this is actual string, not a tensor of strings
|
||||
s = "Hello, world! I have special \0 symbols \1!"
|
||||
workspace.FeedBlob('my_plain_string', s)
|
||||
s2 = workspace.FetchBlob('my_plain_string')
|
||||
self.assertEqual(s, s2)
|
||||
|
||||
def testFetchFeedViaBlobDict(self):
|
||||
self.assertEqual(
|
||||
workspace.RunNetOnce(self.net.Proto().SerializeToString()), True)
|
||||
fetched = workspace.blobs["testblob"]
|
||||
# check if fetched is correct.
|
||||
self.assertEqual(fetched.shape, (1, 2, 3, 4))
|
||||
np.testing.assert_array_equal(fetched, 1.0)
|
||||
fetched[:] = 2.0
|
||||
workspace.blobs["testblob"] = fetched
|
||||
fetched_again = workspace.blobs["testblob"]
|
||||
self.assertEqual(fetched_again.shape, (1, 2, 3, 4))
|
||||
np.testing.assert_array_equal(fetched_again, 2.0)
|
||||
|
||||
self.assertTrue("testblob" in workspace.blobs)
|
||||
self.assertFalse("non_existant" in workspace.blobs)
|
||||
self.assertEqual(len(workspace.blobs), 1)
|
||||
for key in workspace.blobs:
|
||||
self.assertEqual(key, "testblob")
|
||||
|
||||
|
||||
class TestMultiWorkspaces(unittest.TestCase):
|
||||
def setUp(self):
|
||||
|
|
|
|||
|
|
@ -14,7 +14,8 @@ void adagrad_update(
|
|||
float epsilon,
|
||||
const float* lr,
|
||||
Context* context) {
|
||||
#pragma omp parallel for
|
||||
// TODO(cxj): use OMP when it is reliable
|
||||
// #pragma omp parallel for
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = g[i];
|
||||
float hi = nh[i] = h[i] + gi * gi;
|
||||
|
|
@ -78,8 +79,8 @@ class SparseAdagradOp final : public Operator<Context> {
|
|||
const auto* momentIn = Input(MOMENT_1).template data<T>();
|
||||
auto* gradOut = Output(OUTPUT_GRAD)->template mutable_data<T>();
|
||||
auto* momentOut = Output(OUTPUT_MOMENT_1)->template mutable_data<T>();
|
||||
|
||||
#pragma omp parallel for
|
||||
// TODO(cxj): use OMP when it is reliable
|
||||
// #pragma omp parallel for
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
if (block_size == 1) {
|
||||
|
|
|
|||
|
|
@ -41,7 +41,8 @@ void ftrl_update(
|
|||
T* new_nz,
|
||||
const FtrlParams<T>& params,
|
||||
Context* context) {
|
||||
#pragma omp parallel for
|
||||
// TODO(cxj): use OMP when it is reliable
|
||||
// #pragma omp parallel for
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
ftrl_compute(
|
||||
w[i],
|
||||
|
|
@ -93,7 +94,9 @@ void SparseFtrlOp<T>::DoRun() {
|
|||
T* nz = n_z->template mutable_data<T>();
|
||||
const SIndex* idxs = indices.template data<SIndex>();
|
||||
const T* g = grad.template data<T>();
|
||||
#pragma omp parallel for
|
||||
|
||||
// TODO(cxj): use OMP when it is reliable
|
||||
// #pragma omp parallel for
|
||||
for (TIndex i = 0; i < K; ++i) {
|
||||
SIndex idx = idxs[i];
|
||||
DCHECK(0 <= idx && idx < N) << "Index out of bounds: " << idx
|
||||
|
|
|
|||
|
|
@ -11,9 +11,11 @@
|
|||
// platforms, it allows one to quickly port Caffe2 to different platforms
|
||||
// where BLAS may not be present.
|
||||
|
||||
#include <random>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <atomic>
|
||||
#include <random>
|
||||
|
||||
#ifdef CAFFE2_USE_MKL
|
||||
#include <mkl.h>
|
||||
|
|
@ -486,6 +488,7 @@ void Set<T, CPUContext>(const int N, const T alpha, T *Y, \
|
|||
CAFFE2_SPECIALIZED_SET(float);
|
||||
CAFFE2_SPECIALIZED_SET(double);
|
||||
CAFFE2_SPECIALIZED_SET(int);
|
||||
CAFFE2_SPECIALIZED_SET(bool);
|
||||
#undef CAFFE2_SPECIALIZED_SET
|
||||
|
||||
#define CAFFE2_INSTANTIATE_BINARY_OP(name, op, T) \
|
||||
|
|
|
|||
14
caffe2/utils/string_utils.cc
Normal file
14
caffe2/utils/string_utils.cc
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
#include "caffe2/utils/string_utils.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
std::vector<std::string> split(char separator, const std::string& string) {
|
||||
std::vector<std::string> pieces;
|
||||
std::stringstream ss(string);
|
||||
std::string item;
|
||||
while (getline(ss, item, separator)) {
|
||||
pieces.push_back(std::move(item));
|
||||
}
|
||||
return pieces;
|
||||
}
|
||||
}
|
||||
10
caffe2/utils/string_utils.h
Normal file
10
caffe2/utils/string_utils.h
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
#pragma once
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
std::vector<std::string> split(char separator, const std::string& string);
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user