mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Clean up naming of FP16-related code, add comments
This commit is contained in:
parent
b07980334c
commit
ed05ca9fec
|
|
@ -614,7 +614,7 @@ def main():
|
|||
help='Data type used for training')
|
||||
parser.add_argument('--float16_compute', action='store_true',
|
||||
help="Use float 16 compute, if available")
|
||||
parser.add_argument('--enable-tensor-core', action='store_true',
|
||||
parser.add_argument('--enable_tensor_core', action='store_true',
|
||||
help='Enable Tensor Core math for Conv and FC ops')
|
||||
parser.add_argument("--distributed_transport", type=str, default="tcp",
|
||||
help="Transport to use for distributed run [tcp|ibverbs]")
|
||||
|
|
|
|||
|
|
@ -74,8 +74,16 @@ class ExternalInitializer(object):
|
|||
)
|
||||
|
||||
|
||||
class pFP16Initializer(Initializer):
|
||||
|
||||
class PseudoFP16Initializer(Initializer):
|
||||
'''
|
||||
Used in cases when the parameter should be used at half (16-bit) precision
|
||||
for compute purposes (i.e. on the forward and backward pass) but
|
||||
needs to be stored and optimized at single (32-bit) precision so tiny
|
||||
gradients with small learning rates don't underflow FP16 precision.
|
||||
A 32-bit copy of the 16-bit blob is stored in the ParameterInfo.
|
||||
This is helpful for mixed-precision training, see
|
||||
https://arxiv.org/abs/1710.03740 for details.
|
||||
'''
|
||||
def update(self, operator_name, kwargs):
|
||||
if self.operator_name is not None:
|
||||
raise Exception("Operator name overwrites are not allowed")
|
||||
|
|
@ -99,8 +107,12 @@ class pFP16Initializer(Initializer):
|
|||
)
|
||||
|
||||
|
||||
class ReversepFP16Initializer(Initializer):
|
||||
|
||||
class ReversePseudoFP16Initializer(Initializer):
|
||||
'''
|
||||
Like PseudoFP16Initializer above, except the primary blob is taken to
|
||||
be the 32-bit precision parameter, and the 16-bit version of the blob
|
||||
is stored in blob_copy instead.
|
||||
'''
|
||||
def update(self, operator_name, kwargs):
|
||||
if self.operator_name is not None:
|
||||
raise Exception("Operator name overwrites are not allowed")
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from __future__ import unicode_literals
|
|||
import unittest
|
||||
from caffe2.python import brew, model_helper, workspace
|
||||
from caffe2.python.modeling.initializers import (
|
||||
Initializer, pFP16Initializer)
|
||||
Initializer, PseudoFP16Initializer)
|
||||
|
||||
|
||||
class InitializerTest(unittest.TestCase):
|
||||
|
|
@ -52,15 +52,15 @@ class InitializerTest(unittest.TestCase):
|
|||
data = model.net.AddExternalInput("data")
|
||||
fc1 = brew.fc(model, data, "fc1", dim_in=1, dim_out=1)
|
||||
|
||||
# default operator, pFP16Initializer
|
||||
# default operator, PseudoFP16Initializer
|
||||
fc2 = brew.fc(model, fc1, "fc2", dim_in=1, dim_out=1,
|
||||
WeightInitializer=pFP16Initializer
|
||||
WeightInitializer=PseudoFP16Initializer
|
||||
)
|
||||
|
||||
# specified operator, pFP16Initializer
|
||||
# specified operator, PseudoFP16Initializer
|
||||
fc3 = brew.fc(model, fc2, "fc3", dim_in=1, dim_out=1,
|
||||
weight_init=("ConstantFill", {}),
|
||||
WeightInitializer=pFP16Initializer
|
||||
WeightInitializer=PseudoFP16Initializer
|
||||
)
|
||||
|
||||
def test_fc_external_initializer(self):
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ import numpy as np
|
|||
from caffe2.python import brew, core, workspace, cnn, optimizer
|
||||
from caffe2.proto import caffe2_pb2
|
||||
from caffe2.python.modeling.initializers import (
|
||||
Initializer, pFP16Initializer)
|
||||
Initializer, PseudoFP16Initializer)
|
||||
|
||||
from caffe2.python.model_helper import ModelHelper
|
||||
|
||||
|
|
@ -41,7 +41,8 @@ class OptimizerTestBase(object):
|
|||
perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
|
||||
np.random.seed(123) # make test deterministic
|
||||
numpy_dtype = np.float32 if dtype == core.DataType.FLOAT else np.float16
|
||||
initializer = Initializer if dtype == core.DataType.FLOAT else pFP16Initializer
|
||||
initializer = Initializer if dtype == core.DataType.FLOAT else \
|
||||
PseudoFP16Initializer
|
||||
data = np.random.randint(
|
||||
2,
|
||||
size=(20, perfect_model.size)).astype(numpy_dtype)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user