Clean up naming of FP16-related code, add comments

This commit is contained in:
Kutta Srinivasan 2018-03-08 11:54:33 -08:00 committed by kuttas
parent b07980334c
commit ed05ca9fec
4 changed files with 25 additions and 12 deletions

View File

@ -614,7 +614,7 @@ def main():
help='Data type used for training')
parser.add_argument('--float16_compute', action='store_true',
help="Use float 16 compute, if available")
parser.add_argument('--enable-tensor-core', action='store_true',
parser.add_argument('--enable_tensor_core', action='store_true',
help='Enable Tensor Core math for Conv and FC ops')
parser.add_argument("--distributed_transport", type=str, default="tcp",
help="Transport to use for distributed run [tcp|ibverbs]")

View File

@ -74,8 +74,16 @@ class ExternalInitializer(object):
)
class pFP16Initializer(Initializer):
class PseudoFP16Initializer(Initializer):
'''
Used in cases when the parameter should be used at half (16-bit) precision
for compute purposes (i.e. on the forward and backward pass) but
needs to be stored and optimized at single (32-bit) precision so tiny
gradients with small learning rates don't underflow FP16 precision.
A 32-bit copy of the 16-bit blob is stored in the ParameterInfo.
This is helpful for mixed-precision training, see
https://arxiv.org/abs/1710.03740 for details.
'''
def update(self, operator_name, kwargs):
if self.operator_name is not None:
raise Exception("Operator name overwrites are not allowed")
@ -99,8 +107,12 @@ class pFP16Initializer(Initializer):
)
class ReversepFP16Initializer(Initializer):
class ReversePseudoFP16Initializer(Initializer):
'''
Like PseudoFP16Initializer above, except the primary blob is taken to
be the 32-bit precision parameter, and the 16-bit version of the blob
is stored in blob_copy instead.
'''
def update(self, operator_name, kwargs):
if self.operator_name is not None:
raise Exception("Operator name overwrites are not allowed")

View File

@ -21,7 +21,7 @@ from __future__ import unicode_literals
import unittest
from caffe2.python import brew, model_helper, workspace
from caffe2.python.modeling.initializers import (
Initializer, pFP16Initializer)
Initializer, PseudoFP16Initializer)
class InitializerTest(unittest.TestCase):
@ -52,15 +52,15 @@ class InitializerTest(unittest.TestCase):
data = model.net.AddExternalInput("data")
fc1 = brew.fc(model, data, "fc1", dim_in=1, dim_out=1)
# default operator, pFP16Initializer
# default operator, PseudoFP16Initializer
fc2 = brew.fc(model, fc1, "fc2", dim_in=1, dim_out=1,
WeightInitializer=pFP16Initializer
WeightInitializer=PseudoFP16Initializer
)
# specified operator, pFP16Initializer
# specified operator, PseudoFP16Initializer
fc3 = brew.fc(model, fc2, "fc3", dim_in=1, dim_out=1,
weight_init=("ConstantFill", {}),
WeightInitializer=pFP16Initializer
WeightInitializer=PseudoFP16Initializer
)
def test_fc_external_initializer(self):

View File

@ -25,7 +25,7 @@ import numpy as np
from caffe2.python import brew, core, workspace, cnn, optimizer
from caffe2.proto import caffe2_pb2
from caffe2.python.modeling.initializers import (
Initializer, pFP16Initializer)
Initializer, PseudoFP16Initializer)
from caffe2.python.model_helper import ModelHelper
@ -41,7 +41,8 @@ class OptimizerTestBase(object):
perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
np.random.seed(123) # make test deterministic
numpy_dtype = np.float32 if dtype == core.DataType.FLOAT else np.float16
initializer = Initializer if dtype == core.DataType.FLOAT else pFP16Initializer
initializer = Initializer if dtype == core.DataType.FLOAT else \
PseudoFP16Initializer
data = np.random.randint(
2,
size=(20, perfect_model.size)).astype(numpy_dtype)