mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[caffe2] EnforceFinite: log blobs finiteness in workspace on error (#52892)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/52892 When an EnforceFinite check fails this logs all of the tensors in the workspace and whether they are finite or not. This is a little bit hacky since it uses the aten APIs. I've `ifdef`ed the implementation so it should compile fine on xplat and mobile. It's also accessing the workspace directly but since this is a logging op it seems fine to bend the rules. Test Plan: $ buck test //caffe2/caffe2/python/operator_test:enforce_finite_op_test $ buck-out/gen/caffe2/caffe2/python/operator_test/enforce_finite_op_test#binary.par I0225 16:29:46.166507 311548 enforce_finite_op.h:62] blob X isfinite=false Reviewed By: dzhulgakov Differential Revision: D26626336 fbshipit-source-id: f68e219b910a7242f2e72bb4d734c3e84f46eec5
This commit is contained in:
parent
10087337c7
commit
94e23e51c4
|
|
@ -13,8 +13,8 @@ class EnforceFiniteOp final : public Operator<Context> {
|
|||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
template <class... Args>
|
||||
explicit EnforceFiniteOp(Args&&... args)
|
||||
: Operator<Context>(std::forward<Args>(args)...) {}
|
||||
explicit EnforceFiniteOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws), ws_(ws) {}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
return DispatchHelper<TensorTypes<float, double>>::call(this, Input(0));
|
||||
|
|
@ -24,6 +24,7 @@ class EnforceFiniteOp final : public Operator<Context> {
|
|||
bool DoRunWithType();
|
||||
|
||||
private:
|
||||
Workspace* ws_;
|
||||
Tensor buffer_{CPU};
|
||||
|
||||
template <typename T>
|
||||
|
|
@ -32,14 +33,40 @@ class EnforceFiniteOp final : public Operator<Context> {
|
|||
auto size = input.numel();
|
||||
|
||||
for (auto i = 0; i < size; i++) {
|
||||
auto isfinite = std::isfinite(input_data[i]);
|
||||
if (!isfinite) {
|
||||
LogBlobFiniteness();
|
||||
}
|
||||
CAFFE_ENFORCE_FINITE(
|
||||
std::isfinite(input_data[i]),
|
||||
isfinite,
|
||||
"Index ",
|
||||
i,
|
||||
" is not finite (e.g., NaN, Inf): ",
|
||||
input_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// LogBlobFiniteness sums every tensor in the workspace and logs whether it's finite or not.
|
||||
void LogBlobFiniteness() {
|
||||
// This uses the aten interfaces to compute the sum and finiteness of the
|
||||
// tensors which are not present by default on xplat and mobile builds.
|
||||
#if defined(EXPOSE_C2_OPS) || \
|
||||
!defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
|
||||
for (const std::string& blob_name : ws_->Blobs()) {
|
||||
try {
|
||||
const auto& blob = ws_->GetBlob(blob_name);
|
||||
if (blob != nullptr && blob->IsType<Tensor>()) {
|
||||
Tensor* c2Tensor = blob->GetMutable<Tensor>();
|
||||
const at::Tensor& tensor = static_cast<at::Tensor>(*c2Tensor);
|
||||
bool blob_finite = tensor.sum().isfinite().cpu().data_ptr<bool>()[0];
|
||||
LOG(INFO) << "blob " << blob_name << " isfinite=" << (blob_finite ? "true" : "false");
|
||||
}
|
||||
} catch (const std::exception& ex) {
|
||||
LOG(ERROR) << "failed to check finiteness for " << blob_name << ": " << ex.what();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user