mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
quick fix inplace blob bug
Summary: fixing the case where the init net will initialize same blob twice. I made an exception by allowing inplace blob among ops if the blob keeps on the same device. This should fix this problem in a generalized way as most of our training is only on CPU now. Reviewed By: dzhulgakov Differential Revision: D5450564 fbshipit-source-id: 525c4c9a2e5216a70dbd1229da2d9f8a58b89e47
This commit is contained in:
parent
920c553ac0
commit
b51e0ec0c2
|
|
@ -2090,6 +2090,11 @@ def copy_func_between_devices(src, dst):
|
||||||
|
|
||||||
|
|
||||||
def device_equal(src, dst):
|
def device_equal(src, dst):
|
||||||
|
'''
|
||||||
|
We are using this fucntion instead of == operator because optional-value
|
||||||
|
comparison between empty device_options and {device_type:0, cuda_gpu_id:0}
|
||||||
|
returns not equal in some cases.
|
||||||
|
'''
|
||||||
return src.device_type == dst.device_type and src.cuda_gpu_id == dst.cuda_gpu_id
|
return src.device_type == dst.device_type and src.cuda_gpu_id == dst.cuda_gpu_id
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -2182,12 +2187,17 @@ def InjectCrossDeviceCopies(net, blob_to_device=None):
|
||||||
# Enforcing no reuse blob between operators. In-place blob usage in an
|
# Enforcing no reuse blob between operators. In-place blob usage in an
|
||||||
# op is allowed. This is based on the assumption that in-place op has
|
# op is allowed. This is based on the assumption that in-place op has
|
||||||
# same device info
|
# same device info
|
||||||
for out_blob in op.output:
|
for out_blob, device in zip(op.output, output_dev):
|
||||||
if out_blob in blob_to_device and out_blob not in op.input:
|
if out_blob in blob_to_device and (
|
||||||
|
out_blob not in op.input and
|
||||||
|
not device_equal(blob_to_device[out_blob], device)
|
||||||
|
):
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"In-place blob: {} is not supported between operators. "
|
"In-place blob: {} is not supported between operators "
|
||||||
"Failed op:\n {}".
|
"with different device option previous:{} now: {}. "
|
||||||
format(out_blob, op)
|
"Failed op:\n {}".format(
|
||||||
|
out_blob, blob_to_device[out_blob], device, op
|
||||||
|
)
|
||||||
)
|
)
|
||||||
blob_to_device.update({o: d for d, o in zip(output_dev, op.output)})
|
blob_to_device.update({o: d for d, o in zip(output_dev, op.output)})
|
||||||
new_op = caffe2_pb2.OperatorDef()
|
new_op = caffe2_pb2.OperatorDef()
|
||||||
|
|
|
||||||
|
|
@ -787,6 +787,10 @@ external_input: "data"
|
||||||
self.assertEqual(op.input[0], 'param_cuda_1')
|
self.assertEqual(op.input[0], 'param_cuda_1')
|
||||||
|
|
||||||
net.Relu('nonsense_input', 'moment')
|
net.Relu('nonsense_input', 'moment')
|
||||||
|
# should not raise inplace error
|
||||||
|
core.InjectCrossDeviceCopies(net)
|
||||||
|
with core.DeviceScope(device_option):
|
||||||
|
net.Relu('nonsense_input_gpu', 'moment')
|
||||||
with self.assertRaises(RuntimeError):
|
with self.assertRaises(RuntimeError):
|
||||||
core.InjectCrossDeviceCopies(net)
|
core.InjectCrossDeviceCopies(net)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user